gbm/0000755000176000001440000000000013064146243011052 5ustar ripleyusersgbm/inst/0000755000176000001440000000000012102666411012023 5ustar ripleyusersgbm/inst/doc/0000755000176000001440000000000012134211007012560 5ustar ripleyusersgbm/inst/doc/oobperf2.eps0000644000176000001440000007527612102666411015040 0ustar ripleyusers%!PS-Adobe-3.0 EPSF-3.0 %%Title: WMF2EPS 1.32 : WMF->EPS conversion for oobperf2.wmf %%Creator: PScript5.dll Version 5.2.2 %%CreationDate: 1/30/2005 11:3:37 %%For: gregr %%BoundingBox: 56 56 404 271 %%Pages: 1 %%Orientation: Portrait %%PageOrder: Ascend %%DocumentNeededResources: (atend) %%DocumentSuppliedResources: (atend) %%DocumentData: Clean7Bit %%TargetDevice: (WMF2EPS Color PS) (2010.0) 2 %%LanguageLevel: 2 %%EndComments %%BeginDefaults %%PageBoundingBox: 0 0 405 271 %%ViewingOrientation: 1 0 0 1 %%EndDefaults %%BeginProlog %%BeginResource: file Pscript_WinNT_ErrorHandler 5.0 0 /currentpacking where{pop/oldpack currentpacking def/setpacking where{pop false setpacking}if}if/$brkpage 64 dict def $brkpage begin/prnt{dup type/stringtype ne{=string cvs}if dup length 6 mul/tx exch def/ty 10 def currentpoint/toy exch def/tox exch def 1 setgray newpath tox toy 2 sub moveto 0 ty rlineto tx 0 rlineto 0 ty neg rlineto closepath fill tox toy moveto 0 setgray show}bind def /nl{currentpoint exch pop lmargin exch moveto 0 -10 rmoveto}def/=={/cp 0 def typeprint nl}def/typeprint{dup type exec}readonly def/lmargin 72 def/rmargin 72 def/tprint{dup length cp add rmargin gt{nl/cp 0 def}if dup length cp add/cp exch def prnt}readonly def/cvsprint{=string cvs tprint( )tprint}readonly def /integertype{cvsprint}readonly def/realtype{cvsprint}readonly def/booleantype {cvsprint}readonly def/operatortype{(--)tprint =string cvs tprint(-- )tprint} readonly def/marktype{pop(-mark- )tprint}readonly def/dicttype{pop (-dictionary- )tprint}readonly def/nulltype{pop(-null- )tprint}readonly def /filetype{pop(-filestream- )tprint}readonly def/savetype{pop(-savelevel- ) tprint}readonly def/fonttype{pop(-fontid- )tprint}readonly def/nametype{dup xcheck not{(/)tprint}if cvsprint}readonly def/stringtype{dup rcheck{(\()tprint tprint(\))tprint}{pop(-string- )tprint}ifelse}readonly def/arraytype{dup rcheck {dup xcheck{({)tprint{typeprint}forall(})tprint}{([)tprint{typeprint}forall(]) tprint}ifelse}{pop(-array- )tprint}ifelse}readonly def/packedarraytype{dup rcheck{dup xcheck{({)tprint{typeprint}forall(})tprint}{([)tprint{typeprint} forall(])tprint}ifelse}{pop(-packedarray- )tprint}ifelse}readonly def/courier /Courier findfont 10 scalefont def end errordict/handleerror{systemdict begin $error begin $brkpage begin newerror{/newerror false store vmstatus pop pop 0 ne{grestoreall}if errorname(VMerror)ne{showpage}if initgraphics courier setfont lmargin 720 moveto errorname(VMerror)eq{userdict/ehsave known{clear userdict /ehsave get restore 2 vmreclaim}if vmstatus exch pop exch pop PrtVMMsg}{ (ERROR: )prnt errorname prnt nl(OFFENDING COMMAND: )prnt/command load prnt $error/ostack known{nl nl(STACK:)prnt nl nl $error/ostack get aload length{==} repeat}if}ifelse systemdict/showpage get exec(%%[ Error: )print errorname =print(; OffendingCommand: )print/command load =print( ]%%)= flush}if end end end}dup 0 systemdict put dup 4 $brkpage put bind readonly put/currentpacking where{pop/setpacking where{pop oldpack setpacking}if}if %%EndResource userdict /Pscript_WinNT_Incr 230 dict dup begin put %%BeginResource: file Pscript_FatalError 5.0 0 userdict begin/FatalErrorIf{{initgraphics findfont 1 index 0 eq{exch pop}{dup length dict begin{1 index/FID ne{def}{pop pop}ifelse}forall/Encoding {ISOLatin1Encoding}stopped{StandardEncoding}if def currentdict end /ErrFont-Latin1 exch definefont}ifelse exch scalefont setfont counttomark 3 div cvi{moveto show}repeat showpage quit}{cleartomark}ifelse}bind def end %%EndResource userdict begin/PrtVMMsg{vmstatus exch sub exch pop gt{[ (This job requires more memory than is available in this printer.)100 500 (Try one or more of the following, and then print again:)100 485 (For the output format, choose Optimize For Portability.)115 470 (In the Device Settings page, make sure the Available PostScript Memory is accurate.) 115 455(Reduce the number of fonts in the document.)115 440 (Print the document in parts.)115 425 12/Times-Roman showpage (%%[ PrinterError: Low Printer VM ]%%)= true FatalErrorIf}if}bind def end version cvi 2016 ge{/VM?{pop}bind def}{/VM? userdict/PrtVMMsg get def}ifelse %%BeginResource: file Pscript_Win_Basic 5.0 0 /d/def load def/,/load load d/~/exch , d/?/ifelse , d/!/pop , d/`/begin , d/^ /index , d/@/dup , d/+/translate , d/$/roll , d/U/userdict , d/M/moveto , d/- /rlineto , d/&/currentdict , d/:/gsave , d/;/grestore , d/F/false , d/T/true , d/N/newpath , d/E/end , d/Ac/arc , d/An/arcn , d/A/ashow , d/D/awidthshow , d/C /closepath , d/V/div , d/O/eofill , d/L/fill , d/I/lineto , d/-c/curveto , d/-M /rmoveto , d/+S/scale , d/Ji/setfont , d/Lc/setlinecap , d/Lj/setlinejoin , d /Lw/setlinewidth , d/Lm/setmiterlimit , d/sd/setdash , d/S/show , d/LH/showpage , d/K/stroke , d/W/widthshow , d/R/rotate , d/L2? false/languagelevel where{pop languagelevel 2 ge{pop true}if}if d L2?{/xS/xshow , d/yS/yshow , d/zS/xyshow , d}if/b{bind d}bind d/bd{bind d}bind d/xd{~ d}bd/ld{, d}bd/bn/bind ld/lw/Lw ld /lc/Lc ld/lj/Lj ld/sg/setgray ld/ADO_mxRot null d/self & d/OrgMx matrix currentmatrix d/reinitialize{: OrgMx setmatrix[/TextInit/GraphInit/UtilsInit counttomark{@ where{self eq}{F}?{cvx exec}{!}?}repeat cleartomark ;}b /initialize{`{/Pscript_Win_Data where{!}{U/Pscript_Win_Data & put}?/ADO_mxRot ~ d/TextInitialised? F d reinitialize E}{U/Pscript_Win_Data 230 dict @ ` put /ADO_mxRot ~ d/TextInitialised? F d reinitialize}?}b/terminate{!{& self eq {exit}{E}?}loop E}b/suspend/terminate , d/resume{` Pscript_Win_Data `}b U ` /lucas 21690 d/featurebegin{countdictstack lucas[}b/featurecleanup{stopped {cleartomark @ lucas eq{! exit}if}loop countdictstack ~ sub @ 0 gt{{E}repeat} {!}?}b E/snap{transform 0.25 sub round 0.25 add ~ 0.25 sub round 0.25 add ~ itransform}b/dsnap{dtransform round ~ round ~ idtransform}b/nonzero_round{@ 0.5 ge{round}{@ -0.5 lt{round}{0 ge{1}{-1}?}?}?}b/nonzero_dsnap{dtransform nonzero_round ~ nonzero_round ~ idtransform}b U<04>cvn{}put/rr{1 ^ 0 - 0 ~ - neg 0 - C}b/irp{4 -2 $ + +S fx 4 2 $ M 1 ^ 0 - 0 ~ - neg 0 -}b/rp{4 2 $ M 1 ^ 0 - 0 ~ - neg 0 -}b/solid{[]0 sd}b/g{@ not{U/DefIf_save save put}if U/DefIf_bool 2 ^ put}b/DefIf_El{if U/DefIf_bool get not @{U/DefIf_save get restore}if}b/e {DefIf_El !}b/UDF{L2?{undefinefont}{!}?}b/UDR{L2?{undefineresource}{! !}?}b /freeVM{/Courier findfont[40 0 0 -40 0 0]makefont Ji 2 vmreclaim}b/hfRedefFont {findfont @ length dict `{1 ^/FID ne{d}{! !}?}forall & E @ ` ~{/CharStrings 1 dict `/.notdef 0 d & E d}if/Encoding 256 array 0 1 255{1 ^ ~/.notdef put}for d E definefont !}bind d/hfMkCIDFont{/CIDFont findresource @ length 2 add dict `{1 ^ @/FID eq ~ @/XUID eq ~/UIDBase eq or or{! !}{d}?}forall/CDevProc ~ d/Metrics2 16 dict d/CIDFontName 1 ^ d & E 1 ^ ~/CIDFont defineresource ![~]composefont !} bind d %%EndResource %%BeginResource: file Pscript_Win_Utils_L2 5.0 0 /rf/rectfill , d/fx{1 1 dtransform @ 0 ge{1 sub 0.5}{1 add -0.5}? 3 -1 $ @ 0 ge {1 sub 0.5}{1 add -0.5}? 3 1 $ 4 1 $ idtransform 4 -2 $ idtransform}b/BZ{4 -2 $ snap + +S fx rf}b/rs/rectstroke , d/rc/rectclip , d/UtilsInit{currentglobal{F setglobal}if}b/scol{! setcolor}b/colspA/DeviceGray d/colspABC/DeviceRGB d /colspRefresh{colspABC setcolorspace}b/SetColSpace{colspABC setcolorspace}b /resourcestatus where{!/ColorRendering/ProcSet resourcestatus{! ! T}{F}?}{F}? not{/ColorRendering<>/defineresource where{!/ProcSet defineresource !}{! !}?}if/buildcrdname{/ColorRendering/ProcSet findresource ` mark GetHalftoneName @ type @/nametype ne ~/stringtype ne and{!/none}if(.) GetPageDeviceName @ type @/nametype ne ~/stringtype ne and{!/none}if(.)5 ^ 0 5 -1 1{^ length add}for string 6 1 $ 5 ^ 5{~ 1 ^ cvs length 1 ^ length 1 ^ sub getinterval}repeat ! cvn 3 1 $ ! ! E}b/definecolorrendering{~ buildcrdname ~ /ColorRendering defineresource !}b/findcolorrendering where{!}{ /findcolorrendering{buildcrdname @/ColorRendering resourcestatus{! ! T}{ /ColorRendering/ProcSet findresource ` GetSubstituteCRD E F}?}b}? /selectcolorrendering{findcolorrendering !/ColorRendering findresource setcolorrendering}b/G2UBegin{findresource/FontInfo get/GlyphNames2Unicode get `}bind d/G2CCBegin{findresource/FontInfo get/GlyphNames2HostCode get `}bind d /G2UEnd{E}bind d/AddFontInfoBegin{/FontInfo 8 dict @ `}bind d/AddFontInfo{ /GlyphNames2Unicode 16 dict d/GlyphNames2HostCode 16 dict d}bind d /AddFontInfoEnd{E d}bind d/T0AddCFFMtx2{/CIDFont findresource/Metrics2 get ` d E}bind d %%EndResource end %%EndProlog %%BeginSetup [ 1 0 0 1 0 0 ] false Pscript_WinNT_Incr dup /initialize get exec 1 setlinecap 1 setlinejoin /mysetup [ 72 600 V 0 0 -72 600 V 0 270.99212 ] def %%EndSetup %%Page: 1 1 %%PageBoundingBox: 0 0 405 271 %%EndPageComments %%BeginPageSetup /DeviceRGB dup setcolorspace /colspABC exch def mysetup concat colspRefresh %%EndPageSetup 0 0 0 1 scol : 472 1 43 112 rc Pscript_WinNT_Incr begin %%BeginResource: file Pscript_Text 5.0 0 /TextInit{TextInitialised? not{/Pscript_Windows_Font & d/TextInitialised? T d /fM[1 0 0 1 0 0]d/mFM matrix d/iMat[1 0 0.212557 1 0 0]d}if}b/copyfont{1 ^ length add dict `{1 ^/FID ne{d}{! !}?}forall & E}b/EncodeDict 11 dict d/bullets {{/bullet}repeat}b/rF{3 copyfont @ ` ~ EncodeDict ~ get/Encoding ~ 3 ^/0 eq{& /CharStrings known{CharStrings/Eth known not{! EncodeDict/ANSIEncodingOld get} if}if}if d E}b/mF{@ 7 1 $ findfont ~{@/Encoding get @ StandardEncoding eq{! T}{ {ISOLatin1Encoding}stopped{! F}{eq}?{T}{@ ` T 32 1 127{Encoding 1 ^ get StandardEncoding 3 -1 $ get eq and}for E}?}?}{F}?{1 ^ ~ rF}{0 copyfont}? 6 -2 $ ! ! ~ !/pd_charset @ where{~ get 128 eq{@ FDV 2 copy get @ length array copy put pd_CoverFCRange}if}{!}? 2 ^ ~ definefont fM 5 4 -1 $ put fM 4 0 put fM makefont Pscript_Windows_Font 3 1 $ put}b/sLT{: Lw -M currentpoint snap M 0 - 0 Lc K ;}b/xUP null d/yUP null d/uW null d/xSP null d/ySP null d/sW null d/sSU{N /uW ~ d/yUP ~ d/xUP ~ d}b/sU{xUP yUP uW sLT}b/sST{N/sW ~ d/ySP ~ d/xSP ~ d}b/sT {xSP ySP sW sLT}b/sR{: + R 0 0 M}b/sRxy{: matrix astore concat 0 0 M}b/eR/; , d /AddOrigFP{{&/FontInfo known{&/FontInfo get length 6 add}{6}? dict ` /WinPitchAndFamily ~ d/WinCharSet ~ d/OrigFontType ~ d/OrigFontStyle ~ d /OrigFontName ~ d & E/FontInfo ~ d}{! ! ! ! !}?}b/mFS{makefont Pscript_Windows_Font 3 1 $ put}b/mF42D{0 copyfont `/FontName ~ d 2 copy ~ sub 1 add dict `/.notdef 0 d 2 copy 1 ~{@ 3 ^ sub Encoding ~ get ~ d}for & E /CharStrings ~ d ! ! & @ E/FontName get ~ definefont}b/mF42{15 dict ` @ 4 1 $ FontName ~ d/FontType 0 d/FMapType 2 d/FontMatrix[1 0 0 1 0 0]d 1 ^ 254 add 255 idiv @ array/Encoding ~ d 0 1 3 -1 $ 1 sub{@ Encoding 3 1 $ put}for/FDepVector Encoding length array d/CharStrings 2 dict `/.notdef 0 d & E d 0 1 Encoding length 1 sub{@ @ 10 lt{! FontName length 1 add string}{100 lt{FontName length 2 add string}{FontName length 3 add string}?}? @ 0 FontName @ length string cvs putinterval @ 3 -1 $ @ 4 1 $ 3 string cvs FontName length ~ putinterval cvn 1 ^ 256 mul @ 255 add 3 -1 $ 4 ^ findfont mF42D FDepVector 3 1 $ put}for & @ E /FontName get ~ definefont ! ! ! mF}b/mF_OTF_V{~ ! ~ ! 4 -1 $ ! findfont 2 ^ ~ definefont fM @ @ 4 6 -1 $ neg put 5 0 put 90 matrix R matrix concatmatrix makefont Pscript_Windows_Font 3 1 $ put}b/mF_TTF_V{3{~ !}repeat 3 -1 $ ! findfont 1 ^ ~ definefont Pscript_Windows_Font 3 1 $ put}b/UmF{L2? {Pscript_Windows_Font ~ undef}{!}?}b/UmF42{@ findfont/FDepVector get{/FontName get undefinefont}forall undefinefont}b %%EndResource end reinitialize Pscript_WinNT_Incr begin %%BeginResource: file Pscript_Encoding256 5.0 0 /CharCol256Encoding[/.notdef/breve/caron/dotaccent/dotlessi/fi/fl/fraction /hungarumlaut/Lslash/lslash/minus/ogonek/ring/Zcaron/zcaron/.notdef/.notdef /.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef/.notdef /.notdef/.notdef/.notdef/.notdef/.notdef/space/exclam/quotedbl/numbersign /dollar/percent/ampersand/quotesingle/parenleft/parenright/asterisk/plus/comma /hyphen/period/slash/zero/one/two/three/four/five/six/seven/eight/nine/colon /semicolon/less/equal/greater/question/at/A/B/C/D/E/F/G/H/I/J/K/L/M/N/O/P/Q/R/S /T/U/V/W/X/Y/Z/bracketleft/backslash/bracketright/asciicircum/underscore/grave /a/b/c/d/e/f/g/h/i/j/k/l/m/n/o/p/q/r/s/t/u/v/w/x/y/z/braceleft/bar/braceright /asciitilde/.notdef/Euro/.notdef/quotesinglbase/florin/quotedblbase/ellipsis /dagger/daggerdbl/circumflex/perthousand/Scaron/guilsinglleft/OE/.notdef /.notdef/.notdef/.notdef/quoteleft/quoteright/quotedblleft/quotedblright/bullet /endash/emdash/tilde/trademark/scaron/guilsinglright/oe/.notdef/.notdef /Ydieresis/.notdef/exclamdown/cent/sterling/currency/yen/brokenbar/section /dieresis/copyright/ordfeminine/guillemotleft/logicalnot/.notdef/registered /macron/degree/plusminus/twosuperior/threesuperior/acute/mu/paragraph /periodcentered/cedilla/onesuperior/ordmasculine/guillemotright/onequarter /onehalf/threequarters/questiondown/Agrave/Aacute/Acircumflex/Atilde/Adieresis /Aring/AE/Ccedilla/Egrave/Eacute/Ecircumflex/Edieresis/Igrave/Iacute /Icircumflex/Idieresis/Eth/Ntilde/Ograve/Oacute/Ocircumflex/Otilde/Odieresis /multiply/Oslash/Ugrave/Uacute/Ucircumflex/Udieresis/Yacute/Thorn/germandbls /agrave/aacute/acircumflex/atilde/adieresis/aring/ae/ccedilla/egrave/eacute /ecircumflex/edieresis/igrave/iacute/icircumflex/idieresis/eth/ntilde/ograve /oacute/ocircumflex/otilde/odieresis/divide/oslash/ugrave/uacute/ucircumflex /udieresis/yacute/thorn/ydieresis]def EncodeDict/256 CharCol256Encoding put %%EndResource end reinitialize %%IncludeResource: font Times-Roman Pscript_WinNT_Incr begin %%BeginResource: file Pscript_Win_Euro_L2 5.0 0 /UseT3EuroFont{/currentdistillerparams where{pop currentdistillerparams /CoreDistVersion get 4000 le}{false}ifelse}bind def/NewEuroT3Font?{dup/FontType get 3 eq{dup/EuroFont known exch/BaseFont known and}{pop false}ifelse}bind def /T1FontHasEuro{dup/CharStrings known not{dup NewEuroT3Font?{dup/EuroGlyphName get exch/EuroFont get/CharStrings get exch known{true}{false}ifelse}{pop false} ifelse}{dup/FontType get 1 eq{/CharStrings get/Euro known}{dup/InfoDict known{ /InfoDict get/Euro known}{/CharStrings get/Euro known}ifelse}ifelse}ifelse}bind def/FontHasEuro{findfont dup/Blend known{pop true}{T1FontHasEuro}ifelse}bind def/EuroEncodingIdx 1 def/EuroFontHdr{12 dict begin/FontInfo 10 dict dup begin /version(001.000)readonly def/Notice(Copyright (c)1999 Adobe Systems Incorporated. All Rights Reserved.)readonly def/FullName(Euro)readonly def /FamilyName(Euro)readonly def/Weight(Regular)readonly def/isFixedPitch false def/ItalicAngle 0 def/UnderlinePosition -100 def/UnderlineThickness 50 def end readonly def/FontName/Euro def/Encoding 256 array 0 1 255{1 index exch/.notdef put}for def/PaintType 0 def/FontType 1 def/FontMatrix[0.001 0 0 0.001 0 0]def /FontBBox{-25 -23 1500 804}readonly def currentdict end dup/Private 20 dict dup begin/ND{def}def/NP{put}def/lenIV -1 def/RD{string currentfile exch readhexstring pop}def/-|{string currentfile exch readstring pop}executeonly def /|-{def}executeonly def/|{put}executeonly def/BlueValues[-20 0 706 736 547 572] |-/OtherBlues[-211 -203]|-/BlueScale 0.0312917 def/MinFeature{16 16}|-/StdHW [60]|-/StdVW[71]|-/ForceBold false def/password 5839 def/Erode{8.5 dup 3 -1 roll 0.1 mul exch 0.5 sub mul cvi sub dup mul 71 0 dtransform dup mul exch dup mul add le{pop pop 1.0 1.0}{pop pop 0.0 1.5}ifelse}def/OtherSubrs[{}{}{} {systemdict/internaldict known not{pop 3}{1183615869 systemdict/internaldict get exec dup/startlock known{/startlock get exec}{dup/strtlck known{/strtlck get exec}{pop 3}ifelse}ifelse}ifelse}executeonly]|-/Subrs 5 array dup 0 <8E8B0C100C110C110C210B>put dup 1<8B8C0C100B>put dup 2<8B8D0C100B>put dup 3<0B> put dup 4<8E8C8E0C100C110A0B>put |- 2 index/CharStrings 256 dict dup begin /.notdef<8b8b0d0e>def end end put put dup/FontName get exch definefont pop}bind def/AddEuroGlyph{2 index exch EuroEncodingIdx 1 eq{EuroFontHdr}if systemdict begin/Euro findfont dup dup/Encoding get 5 1 roll/Private get begin/CharStrings get dup 3 index known{pop pop pop pop end end}{begin 1 index exch def end end end EuroEncodingIdx dup 1 add/EuroEncodingIdx exch def exch put}ifelse}bind def /GetNewXUID{currentdict/XUID known{[7 XUID aload pop]true}{currentdict/UniqueID known{[7 UniqueID]true}{false}ifelse}ifelse}bind def/BuildT3EuroFont{exch 16 dict begin dup/FontName exch def findfont dup/Encoding get/Encoding exch def dup length 1 add dict copy dup/FID undef begin dup dup/FontName exch def /Encoding 256 array 0 1 255{1 index exch/.notdef put}for def GetNewXUID{/XUID exch def}if currentdict end definefont pop/BaseFont exch findfont 1000 scalefont def/EuroFont exch findfont 1000 scalefont def pop/EuroGlyphName exch def/FontType 3 def/FontMatrix[.001 0 0 .001 0 0]def/FontBBox BaseFont/FontBBox get def/Char 1 string def/BuildChar{exch dup begin/Encoding get 1 index get /Euro eq{BaseFont T1FontHasEuro{false}{true}ifelse}{false}ifelse{EuroFont setfont pop userdict/Idx 0 put EuroFont/Encoding get{EuroGlyphName eq{exit} {userdict/Idx Idx 1 add put}ifelse}forall userdict/Idx get}{dup dup Encoding exch get BaseFont/Encoding get 3 1 roll put BaseFont setfont}ifelse Char 0 3 -1 roll put Char stringwidth newpath 0 0 moveto Char true charpath flattenpath pathbbox setcachedevice 0 0 moveto Char show end}bind def currentdict end dup /FontName get exch definefont pop}bind def/AddEuroToT1Font{dup findfont dup length 10 add dict copy dup/FID undef begin/EuroFont 3 -1 roll findfont 1000 scalefont def CharStrings dup length 1 add dict copy begin/Euro{EuroFont setfont pop EuroGBBox aload pop setcachedevice 0 0 moveto EuroGName glyphshow} bind def currentdict end/CharStrings exch def GetNewXUID{/XUID exch def}if 3 1 roll/EuroGBBox exch def/EuroGName exch def currentdict end definefont pop}bind def/BuildNewFont{UseT3EuroFont{BuildT3EuroFont}{pop AddEuroToT1Font}ifelse}bind def/UseObliqueEuro{findfont/FontMatrix get dup 2 get 0 eq exch dup 0 get exch 3 get eq and UseT3EuroFont or}bind def %%EndResource end reinitialize /Times-Roman FontHasEuro not { /Euro.Times-Roman [500 0 24 -14 493 676 ] AddEuroGlyph /Euro /Times-Roman /Times-Roman-Copy BuildNewFont } if F /F0 0 /256 T /Times-Roman mF /F0S63 F0 [99.363 0 0 -99.363 0 0 ] mFS F0S63 Ji 469 89 M ( )S Pscript_WinNT_Incr begin %%BeginResource: file Pscript_Win_GdiObject 5.0 0 /SavedCTM null d/CTMsave{/SavedCTM SavedCTM currentmatrix d}b/CTMrestore {SavedCTM setmatrix}b/mp null d/ADO_mxRot null d/GDIHMatrix null d /GDIHPatternDict 22 dict d GDIHPatternDict `/PatternType 1 d/PaintType 2 d/Reps L2?{1}{5}? d/XStep 8 Reps mul d/YStep XStep d/BBox[0 0 XStep YStep]d/TilingType 1 d/PaintProc{` 1 Lw[]0 sd PaintData , exec E}b/FGnd null d/BGnd null d /HS_Horizontal{horiz}b/HS_Vertical{vert}b/HS_FDiagonal{fdiag}b/HS_BDiagonal {biag}b/HS_Cross{horiz vert}b/HS_DiagCross{fdiag biag}b/MaxXYStep XStep YStep gt{XStep}{YStep}? d/horiz{Reps{0 4 M XStep 0 - 0 8 +}repeat 0 -8 Reps mul + K}b /vert{Reps{4 0 M 0 YStep - 8 0 +}repeat 0 -8 Reps mul + K}b/biag{Reps{0 0 M MaxXYStep @ - 0 YStep neg M MaxXYStep @ - 0 8 +}repeat 0 -8 Reps mul + 0 YStep M 8 8 - K}b/fdiag{Reps{0 0 M MaxXYStep @ neg - 0 YStep M MaxXYStep @ neg - 0 8 +}repeat 0 -8 Reps mul + MaxXYStep @ M 8 -8 - K}b E/makehatch{4 -2 $/yOrg ~ d /xOrg ~ d GDIHPatternDict/PaintData 3 -1 $ put CTMsave GDIHMatrix setmatrix GDIHPatternDict matrix xOrg yOrg + mp CTMrestore ~ U ~ 2 ^ put}b/h0{/h0 /HS_Horizontal makehatch}b/h1{/h1/HS_Vertical makehatch}b/h2{/h2/HS_FDiagonal makehatch}b/h3{/h3/HS_BDiagonal makehatch}b/h4{/h4/HS_Cross makehatch}b/h5{/h5 /HS_DiagCross makehatch}b/GDIBWPatternMx null d/pfprep{save 8 1 $ /PatternOfTheDay 8 1 $ GDIBWPatternDict `/yOrg ~ d/xOrg ~ d/PaintData ~ d/yExt ~ d/Width ~ d/BGnd ~ d/FGnd ~ d/Height yExt RepsV mul d/mx[Width 0 0 Height 0 0]d E build_pattern ~ !}b/pfbf{/fEOFill ~ d pfprep hbf fEOFill{O}{L}? restore}b /GraphInit{GDIHMatrix null eq{/SavedCTM matrix d : ADO_mxRot concat 0 0 snap + : 0.48 @ GDIHPatternDict ` YStep mul ~ XStep mul ~ nonzero_dsnap YStep V ~ XStep V ~ E +S/GDIHMatrix matrix currentmatrix readonly d ; : 0.24 -0.24 +S GDIBWPatternDict ` Width Height E nonzero_dsnap +S/GDIBWPatternMx matrix currentmatrix readonly d ; ;}if}b %%EndResource %%BeginResource: file Pscript_Win_GdiObject_L2 5.0 0 /GDIBWPatternDict 25 dict @ `/PatternType 1 d/PaintType 1 d/RepsV 1 d/RepsH 1 d /BBox[0 0 RepsH 1]d/TilingType 1 d/XStep 1 d/YStep 1 d/Height 8 RepsV mul d /Width 8 d/mx[Width 0 0 Height neg 0 Height]d/FGnd null d/BGnd null d /SetBGndFGnd{BGnd null ne{BGnd aload ! scol BBox aload ! 2 ^ sub ~ 3 ^ sub ~ rf}if FGnd null ne{FGnd aload ! scol}if}b/PaintProc{` SetBGndFGnd RepsH{Width Height F mx PaintData imagemask Width 0 +}repeat E}b E d/mp/makepattern , d /build_pattern{CTMsave GDIBWPatternMx setmatrix/nupangle where{! nupangle -90 eq{nupangle R}if}if GDIBWPatternDict @ ` Width Height ne{Width Height gt{Width Height V 1}{1 Height Width V}? +S}if xOrg yOrg E matrix + mp CTMrestore}b/hbf {setpattern}b/hf{:/fEOFill ~ d ~ ! setpattern fEOFill{O}{L}? ;}b/pbf{: ! /fEOFill ~ d GDIBWPatternDict `/yOrg ~ d/xOrg ~ d/PaintData ~ d/OutputBPP ~ d /Height ~ d/Width ~ d/PaintType 1 d/PatternType 1 d/TilingType 1 d/BBox[0 0 Width Height]d/XStep Width d/YStep Height d/mx xOrg yOrg matrix + d 20 dict @ ` /ImageType 1 d/Width Width d/Height Height d/ImageMatrix[1 0 0 1 0 0]d /BitsPerComponent 8 d OutputBPP 24 eq{/Decode[0 1 0 1 0 1]d}{OutputBPP 8 eq{ /Decode[0 1]d}{/Decode[0 1 0 1 0 1 0 1]d}?}?/DataSource{PaintData}d E/ImageDict ~ d/PaintProc{` ImageDict image E}b & mx makepattern setpattern E fEOFill{O}{L} ? ;}b/mask_pbf{:/fEOFill ~ d 20 dict `/yOrg ~ d/xOrg ~ d/PaintData ~ d/Height ~ d/Width ~ d/PatternType 1 d/PaintType 2 d/TilingType 1 d/BBox[0 0 Width Height] d/XStep Width d/YStep Height d/mx xOrg yOrg matrix + d/PaintProc{` Width Height T 1 1 dtransform abs ~ abs ~ 0 0 3 -1 $ 0 0 6 array astore{PaintData}imagemask E}b & mx makepattern setpattern E fEOFill{O}{L}? ;}b %%EndResource end reinitialize ; N 961 92 M 961 176 I 1463 176 I 1463 92 I 961 92 I C 0.68 0.848 0.902 1 scol O 0 0 0 1 scol 1 Lj 1 Lc 5 Lw solid N 961 92 M 961 176 I 1463 176 I 1463 92 I C : 1.289 1.289 +S K ; 4 Lw N 961 92 M 961 176 I 1463 176 I 1463 92 I C : 1.289 1.289 +S K ; N 961 147 M 1463 147 I : 1.289 1.289 +S K ; N 1215 209 M 1215 190 I : 1.289 1.289 +S K ; N 1215 77 M 1215 92 I : 1.289 1.289 +S K ; N 1087 209 M 1337 209 I : 1.289 1.289 +S K ; N 1087 77 M 1337 77 I : 1.289 1.289 +S K ; N 1215 302 M 1211 303 I 1208 303 I 1207 305 I 1204 307 I 1203 308 I 1202 311 I 1200 314 I 1200 316 I 1200 319 I 1202 321 I 1203 324 I 1204 327 I 1207 328 I 1208 329 I 1211 330 I 1215 330 I 1217 330 I 1220 329 I 1222 328 I 1224 327 I 1226 324 I 1228 321 I 1228 319 I 1229 316 I 1228 314 I 1228 311 I 1226 308 I 1224 307 I 1222 305 I 1220 303 I 1217 303 I 1215 302 I : 1.289 1.289 +S K ; N 1215 1336 M 1211 1336 I 1208 1337 I 1207 1338 I 1204 1340 I 1203 1342 I 1202 1345 I 1200 1347 I 1200 1350 I 1200 1352 I 1202 1355 I 1203 1358 I 1204 1360 I 1207 1361 I 1208 1363 I 1211 1364 I 1215 1364 I 1217 1364 I 1220 1363 I 1222 1361 I 1224 1360 I 1226 1358 I 1228 1355 I 1228 1352 I 1229 1350 I 1228 1347 I 1228 1345 I 1226 1342 I 1224 1340 I 1222 1338 I 1220 1337 I 1217 1336 I 1215 1336 I : 1.289 1.289 +S K ; N 1591 86 M 1591 129 I 2094 129 I 2094 86 I 1591 86 I C 0.68 0.848 0.902 1 scol O 0 0 0 1 scol 5 Lw N 1591 86 M 1591 129 I 2094 129 I 2094 86 I C : 1.289 1.289 +S K ; 4 Lw N 1591 86 M 1591 129 I 2094 129 I 2094 86 I C : 1.289 1.289 +S K ; N 1591 110 M 2094 110 I : 1.289 1.289 +S K ; N 1840 77 M 1840 86 I : 1.289 1.289 +S K ; N 1712 129 M 1966 129 I : 1.289 1.289 +S K ; N 1712 77 M 1966 77 I : 1.289 1.289 +S K ; N 1840 190 M 1837 190 I 1835 191 I 1832 192 I 1830 194 I 1828 196 I 1827 199 I 1826 201 I 1826 204 I 1826 207 I 1827 209 I 1828 212 I 1830 214 I 1832 216 I 1835 217 I 1837 218 I 1840 218 I 1843 218 I 1845 217 I 1848 216 I 1849 214 I 1852 212 I 1853 209 I 1853 207 I 1854 204 I 1853 201 I 1853 199 I 1852 196 I 1849 194 I 1848 192 I 1845 191 I 1843 190 I 1840 190 I : 1.289 1.289 +S K ; N 1840 218 M 1837 218 I 1835 219 I 1832 221 I 1830 222 I 1828 225 I 1827 226 I 1826 228 I 1826 232 I 1826 235 I 1827 237 I 1828 240 I 1830 241 I 1832 244 I 1835 245 I 1837 245 I 1840 247 I 1843 245 I 1845 245 I 1848 244 I 1849 241 I 1852 240 I 1853 237 I 1853 235 I 1854 232 I 1853 228 I 1853 226 I 1852 225 I 1849 222 I 1848 221 I 1845 219 I 1843 218 I 1840 218 I : 1.289 1.289 +S K ; N 1840 588 M 1837 590 I 1835 590 I 1832 591 I 1830 594 I 1828 595 I 1827 598 I 1826 600 I 1826 603 I 1826 605 I 1827 608 I 1828 610 I 1830 613 I 1832 614 I 1835 616 I 1837 617 I 1840 617 I 1843 617 I 1845 616 I 1848 614 I 1849 613 I 1852 610 I 1853 608 I 1853 605 I 1854 603 I 1853 600 I 1853 598 I 1852 595 I 1849 594 I 1848 591 I 1845 590 I 1843 590 I 1840 588 I : 1.289 1.289 +S K ; N 2216 92 M 2216 185 I 2719 185 I 2719 92 I 2216 92 I C 0.68 0.848 0.902 1 scol O 0 0 0 1 scol 5 Lw N 2216 92 M 2216 185 I 2719 185 I 2719 92 I C : 1.289 1.289 +S K ; 4 Lw N 2216 92 M 2216 185 I 2719 185 I 2719 92 I C : 1.289 1.289 +S K ; N 2216 101 M 2719 101 I : 1.289 1.289 +S K ; N 2469 232 M 2469 213 I : 1.289 1.289 +S K ; N 2469 195 M 2469 185 I : 1.289 1.289 +S K ; N 2469 77 M 2469 92 I : 1.289 1.289 +S K ; N 2343 232 M 2597 232 I : 1.289 1.289 +S K ; N 2343 77 M 2597 77 I : 1.289 1.289 +S K ; N 2469 327 M 2467 327 I 2464 327 I 2461 328 I 2460 330 I 2458 332 I 2456 334 I 2456 337 I 2455 339 I 2456 343 I 2456 346 I 2458 348 I 2460 350 I 2461 352 I 2464 354 I 2467 354 I 2469 354 I 2473 354 I 2476 354 I 2477 352 I 2479 350 I 2481 348 I 2482 346 I 2483 343 I 2483 339 I 2483 337 I 2482 334 I 2481 332 I 2479 330 I 2477 328 I 2476 327 I 2473 327 I 2469 327 I : 1.289 1.289 +S K ; N 2469 579 M 2467 579 I 2464 581 I 2461 582 I 2460 583 I 2458 586 I 2456 588 I 2456 591 I 2455 594 I 2456 596 I 2456 599 I 2458 601 I 2460 604 I 2461 605 I 2464 607 I 2467 608 I 2469 608 I 2473 608 I 2476 607 I 2477 605 I 2479 604 I 2481 601 I 2482 599 I 2483 596 I 2483 594 I 2483 591 I 2482 588 I 2481 586 I 2479 583 I 2477 582 I 2476 581 I 2473 579 I 2469 579 I : 1.289 1.289 +S K ; N 2846 77 M 2846 77 I 3348 77 I 3348 77 I 2846 77 I C 0.68 0.848 0.902 1 scol O 0 0 0 1 scol 5 Lw N 2846 77 M 2846 77 I 3348 77 I 3348 77 I C : 1.289 1.289 +S K ; 4 Lw N 2846 77 M 3348 77 I 2846 77 I 3348 77 I : 1.289 1.289 +S K ; N 2973 77 M 3222 77 I : 1.289 1.289 +S K ; N 2973 77 M 3222 77 I : 1.289 1.289 +S K ; N 3094 72 M 3092 72 I 3089 74 I 3087 75 I 3085 76 I 3083 79 I 3082 81 I 3082 84 I 3080 86 I 3082 89 I 3082 92 I 3083 94 I 3085 97 I 3087 98 I 3089 99 I 3092 101 I 3094 101 I 3098 101 I 3101 99 I 3102 98 I 3105 97 I 3106 94 I 3107 92 I 3109 89 I 3109 86 I 3109 84 I 3107 81 I 3106 79 I 3105 76 I 3102 75 I 3101 74 I 3098 72 I 3094 72 I : 1.289 1.289 +S K ; N 3094 170 M 3092 172 I 3089 172 I 3087 173 I 3085 176 I 3083 177 I 3082 179 I 3082 182 I 3080 185 I 3082 188 I 3082 191 I 3083 192 I 3085 195 I 3087 196 I 3089 197 I 3092 199 I 3094 199 I 3098 199 I 3101 197 I 3102 196 I 3105 195 I 3106 192 I 3107 191 I 3109 188 I 3109 185 I 3109 182 I 3107 179 I 3106 177 I 3105 176 I 3102 173 I 3101 172 I 3098 172 I 3094 170 I : 1.289 1.289 +S K ; N 1215 1401 M 1215 1443 I : 1.289 1.289 +S K ; N 1840 1401 M 1840 1443 I : 1.289 1.289 +S K ; N 2469 1401 M 2469 1443 I : 1.289 1.289 +S K ; N 3094 1401 M 3094 1443 I : 1.289 1.289 +S K ; %%IncludeResource: font Helvetica /Helvetica FontHasEuro not { /Euro.Helvetica [556 0 24 -19 541 703 ] AddEuroGlyph /Euro /Helvetica /Helvetica-Copy BuildNewFont } if F /F1 0 /256 T /Helvetica mF /F1S4A F1 [74.844 0 0 -74.844 0 0 ] mFS F1S4A Ji 1132 1567 M (OOB)[58 58 0]xS : 1298 1497 30 94 rc F0S63 Ji 1298 1567 M ( )S ; 1681 1567 M (Test 33%)[47 41 37 21 21 41 41 0]xS : 1997 1497 27 94 rc F0S63 Ji 1997 1567 M ( )S ; 2312 1567 M (Test 20%)[46 42 37 21 20 41 42 0]xS : 2628 1497 27 94 rc F0S63 Ji 2628 1567 M ( )S ; 2940 1567 M (5)S 2981 1567 M (-)S 3005 1567 M (fold CV)[21 41 17 41 21 54 0]xS : 3250 1497 39 94 rc F0S63 Ji 3250 1567 M ( )S ; N 794 1392 M 752 1392 I : 1.289 1.289 +S K ; N 794 1063 M 752 1063 I : 1.289 1.289 +S K ; N 794 734 M 752 734 I : 1.289 1.289 +S K ; N 794 405 M 752 405 I : 1.289 1.289 +S K ; N 794 77 M 752 77 I : 1.289 1.289 +S K ; /F1S00IFFFFFFB5 F1 [0 -74.844 -74.844 0 0 0 ] mFS F1S00IFFFFFFB5 Ji 712 1443 M (0.2)[-42 -20 0]yS : 641 1315 95 26 rc /F0S00IFFFFFF9C F0 [0 -99.363 -99.363 0 0 0 ] mFS F0S00IFFFFFF9C Ji 712 1340 M ( )S ; 712 1114 M (0.4)[-42 -20 0]yS : 641 986 95 25 rc F0S00IFFFFFF9C Ji 712 1010 M ( )S ; 712 786 M (0.6)[-41 -21 0]yS : 641 658 95 26 rc F0S00IFFFFFF9C Ji 712 683 M ( )S ; 712 457 M (0.8)[-41 -21 0]yS : 641 329 95 26 rc F0S00IFFFFFF9C Ji 712 354 M ( )S ; 712 128 M (1.0)[-42 -20 0]yS : 641 0 95 26 rc F0S00IFFFFFF9C Ji 712 25 M ( )S ; F1S4A Ji 1416 1734 M (Method for selecting the number of iterations)[62 41 21 41 41 43 20 21 41 25 20 39 41 17 41 38 20 17 41 41 22 21 41 42 20 43 41 63 41 42 24 21 41 22 21 16 21 41 26 41 21 17 41 41 0]xS : 2895 1665 47 93 rc F0S63 Ji 2895 1734 M ( )S ; F1S00IFFFFFFB5 Ji 543 1210 M (Performance over 13 datasets)[-50 -41 -25 -20 -42 -24 -63 -42 -41 -37 -42 -20 -42 -38 -42 -24 -21 -42 -42 -20 -42 -41 -22 -41 -38 -41 -21 0]yS : 472 162 95 48 rc F0S00IFFFFFF9C Ji 543 209 M ( )S ; 5 Lw : 789 0 2573 1406 rc N 794 4 M 794 1400 I 3356 1400 I 3356 4 I C : 1.289 1.289 +S K ; ; LH %%PageTrailer %%Trailer %%DocumentNeededResources: %%+ font Times-Roman %%+ font Helvetica %%DocumentSuppliedResources: %%+ procset Pscript_WinNT_ErrorHandler 5.0 0 %%+ procset Pscript_FatalError 5.0 0 %%+ procset Pscript_Win_Basic 5.0 0 %%+ procset Pscript_Win_Utils_L2 5.0 0 %%+ procset Pscript_Text 5.0 0 %%+ procset Pscript_Encoding256 5.0 0 %%+ procset Pscript_Win_Euro_L2 5.0 0 %%+ procset Pscript_Win_GdiObject 5.0 0 %%+ procset Pscript_Win_GdiObject_L2 5.0 0 Pscript_WinNT_Incr dup /terminate get exec %%EOF gbm/inst/doc/index.html0000644000176000001440000000135112102666411014565 0ustar ripleyusers R: Vignettes

Vignettes


[Top]

Vignettes from package 'gbm'

gbm/inst/doc/oobperf2.pdf0000644000176000001440000002317112102666411015005 0ustar ripleyusers%PDF-1.3 %쏢 6 0 obj <> stream xZKof n VMMMct^I`O03iKQg{&6M)Ýp?[5v,ḻrvj3vtR.?onp2?[yw!3ḹOl5]Mݺp|zp 8'^Rm~z;\);i.L*[:*nѤ|m`H-kkpgbϧ[d&PrUk눍\FX+*6 {'R2FKHdAli쥝%mXSb{^Kb = ƺ\]fki1ZvYYi|aNf_]WEKkS?E$4l[' z`NϤ^=bvRVBYmC\ nmX+6ZN& ƚXs6DC^hEՐShk]䴎,ド_xmoƫ>BnLԳ05YiSyypVU ll2c8^< Z`73 |6$:a%x[Gi**zQ;Z:bʲ*,֖=U{9gz菼*`e4ҋ23F&YV)[(\e&VoBj`s=4IL]㊸,u:x05Ci2!{l% e6hUI5):%]4-(xi5xsp1nt7.IϰZBW*] TْiO| t3FH\jb\@.C`W,g0ֆ vaV' ©ͫn 4l"&踤Y]EPxPQ&.Sd\Z4a'pv40.ʖ |ʴi"%`&6L^]>B' lN\.Fco%:W,۳xd~{A6 !{ |-}l4mPFEP REE6$U+C d]EU:f"evМIs *ﰋ(DѶ.+t9&l3 = ]|n7A.0)o|"T j͙dQm >idrUɅ[n̖Ud.lxN: \aeEL,̴f|f0?gcՅ^R]ޘXRG+`meV l/X}㥙`Ukya&[&"33?rx3Xe?=7fƙr0!^B^3@]釳81pe֍{]W:?lJ/UFO:L=Czᡥ&9uib#?TqL],~H6+<=ϧ˦ |vld'S ?g aG;kN$Ahe<$X=4ފnxXK1> /Contents 6 0 R >> endobj 3 0 obj << /Type /Pages /Kids [ 5 0 R ] /Count 1 >> endobj 1 0 obj <> endobj 4 0 obj <> endobj 14 0 obj <> endobj 15 0 obj <> endobj 12 0 obj <> endobj 11 0 obj <>stream x]V TqVwY=A60:Ȏq[D ↚ {T "aP02䛨?I 4?sTwW}{o2@D"n V^JU2-!&Z?9A ##O_HJq吒{M/(HW薔&j33gΔ/](On2d21m܍VbqTytl2V V)%2,vvdp^*NL+ʸtUtLR5.15)M?%.5>-!=(C^8u k8{)GS6ʟR(;ʓ)/*)*J-|)Wj:5r8ʌ2QF1%L(2ԗxRVʀ) 9`jpqx@e\/Iܕufe&.jA)# LQ3a.$su"#m1 w g:};]#d ZHؒ*LnN~6__}fi\x-6 mvh0ik/wG#6Z˦K\wFQq{iZoHtUc}Xk,\Z+ 1:ڢvovS mͥBӍsYDzE7#mLM+:Sx8ɼAy&tI9/nҰF">&ҘCa.jxX'li8Iq4qMˎ_S$;rw Z~4nȴ$C,LjD:Tl*Ǻ{!eEEyߝ51kٶ qv` 8|D;`g;^hC+LI^DYBP<W1 yw{28xcLJ vllh5΄}$Ep "b5'ꜰ\p"KsgbL1O)&fUϑNWrҊ7M pœ"=6^Nu!R b[.& ހdo^f~*yҬlGȱZtIdJڰ2%-AdUb}jO({(`eOWa;zՒ4eBd }jIizRteH\w 'wbHupGtLU}y VZ?N⾵,jyH+矯IlYH~2cy(e`<.o&x,) 1| mtZ+'ag-8l-,ap.@b@uShH{TS"ahXH t)}u}sg8_+sO2)W~zB2rxDhI)MGs(\"7֥S y(Q2|l]:XwcGPB% sK}vA/d(Fmuck;J\rOn.[m{mvg@Ni 4> endobj 8 0 obj <>stream xcd`ab`dduM- M f!Cﵿf2`e̻GOqE% A:CKKKJKjqfzQ_Wb TSYQYP_ᬩ`d`` $JsSu222K*RsSrSRAfU*due&+=o^X) L,|C泭zr2σ< }nx endstream endobj 17 0 obj 298 endobj 10 0 obj <> endobj 13 0 obj <> endobj 2 0 obj <>endobj xref 0 18 0000000000 65535 f 0000002821 00000 n 0000009361 00000 n 0000002762 00000 n 0000002869 00000 n 0000002602 00000 n 0000000015 00000 n 0000002582 00000 n 0000006633 00000 n 0000006395 00000 n 0000007036 00000 n 0000003360 00000 n 0000003011 00000 n 0000008197 00000 n 0000002938 00000 n 0000002968 00000 n 0000006374 00000 n 0000007016 00000 n trailer << /Size 18 /Root 1 0 R /Info 2 0 R >> startxref 9411 %%EOF gbm/inst/doc/srcltx.sty0000644000176000001440000001170412102666411014653 0ustar ripleyusers%% %% This is file `srcltx.sty', %% generated with the docstrip utility. %% %% The original source files were: %% %% srcltx.dtx (with options: `package,latex') %% %% This package is in the public domain. It comes with no guarantees %% and no reserved rights. You can use or modify this package at your %% own risk. %% Originally written by: Aleksander Simonic %% Current maintainer: Stefan Ulrich %% \NeedsTeXFormat{LaTeX2e} \ProvidesPackage{srcltx}[2006/11/12 v1.6 Source specials for inverse search in DVI files] \newif\ifSRCOK \SRCOKtrue \newif\ifsrc@debug@ \newif\ifsrc@dviwin@ \newif\ifsrc@winedt@\src@winedt@true \newif\ifsrc@everypar@\src@everypar@true \newif\ifsrc@everymath@\src@everymath@true \RequirePackage{ifthen} \DeclareOption{active}{\SRCOKtrue} \DeclareOption{inactive}{\SRCOKfalse} \DeclareOption{nowinedt}{\src@winedt@false} \DeclareOption{debug}{\src@debug@true} \DeclareOption{nopar}{\global\src@everypar@false} \DeclareOption{nomath}{\global\src@everymath@false} \newcommand*\src@maybe@space{} \let\src@maybe@space\space \DeclareOption{dviwin}{\let\src@maybe@space\relax} \ExecuteOptions{active} \ProcessOptions \newcount\src@lastline \global\src@lastline=-1 \newcommand*\src@debug{} \def\src@debug#1{\ifsrc@debug@\typeout{DBG: |#1|}\fi} \newcommand*\MainFile{} \def\MainFile{\jobname.tex} \newcommand*\CurrentInput{} \gdef\CurrentInput{\MainFile} \newcommand*\WinEdt{} \def\WinEdt#1{\ifsrc@winedt@\typeout{:#1}\fi} \newcommand\src@AfterFi{} \def\src@AfterFi#1\fi{\fi#1} \AtBeginDocument{% \@ifpackageloaded{soul}{% \let\src@SOUL@\SOUL@ \def\SOUL@#1{% \ifSRCOK \SRCOKfalse\src@SOUL@{#1}\SRCOKtrue \else \src@AfterFi\src@SOUL@{#1}% \fi }% }{}% } \newcommand*\srcIncludeHook[1]{\protected@xdef\CurrentInput{#1.tex}} \newcommand*\srcInputHook[1]{% \src@getfilename@with@ext{#1}% } \newcommand*\src@spec{} \def\src@spec{% \ifSRCOK \ifnum\inputlineno>\src@lastline \global\src@lastline=\inputlineno \src@debug{% src:\the\inputlineno\src@maybe@space\CurrentInput}% \special{src:\the\inputlineno\src@maybe@space\CurrentInput}% \fi \fi } \newcommand\src@before@file@hook{} \newcommand\src@after@file@hook{} \def\src@before@file@hook{% \WinEdt{<+ \CurrentInput}% \global\src@lastline=0 \ifSRCOK\special{src:1\src@maybe@space\CurrentInput}\fi } \def\src@after@file@hook#1{% \WinEdt{<-}% \global\src@lastline=\inputlineno \global\advance\src@lastline by -1% \gdef\CurrentInput{#1}% \src@spec } \newcommand*\src@fname{}% \newcommand*\src@tempa{}% \newcommand*\src@extensions@path{}% \newcommand*\src@getfilename@with@ext{}% \def\src@extensions@path#1.#2\end{% \ifthenelse{\equal{#2}{}}{% \protected@edef\src@extensions@last{#1}% \let\src@tempa\relax }{% \def\src@tempa{\src@extensions@path#2\end}% }% \src@tempa } \def\src@getfilename@with@ext#1{% \expandafter\src@extensions@path#1.\end \ifthenelse{\equal{\src@extensions@last}{tex}}{% \protected@xdef\CurrentInput{#1}% }{% \protected@xdef\CurrentInput{#1.tex}% }% \PackageInfo{srcltx}{Expanded filename `#1' to `\CurrentInput'}% } \newcommand*\src@include{} \newcommand*\src@@include{} \let\src@include\include \def\include#1{% \src@spec \clearpage \expandafter\src@@include\expandafter{\CurrentInput}{#1}% }% \def\src@@include#1#2{% \srcIncludeHook{#2}% \src@before@file@hook \src@include{#2}% \src@after@file@hook{#1}% } \newcommand*\src@input{} \newcommand*\src@@input{} \newcommand*\src@@@input{} \let\src@input\input \def\input{\src@spec\@ifnextchar\bgroup\src@@input\@@input}% \def\src@@input#1{% \expandafter\src@@@input\expandafter{\CurrentInput}{#1}% } \def\src@@@input#1#2{% \srcInputHook{#2}% \src@before@file@hook \src@input{#2}% \src@after@file@hook{#1}% } \newcommand\Input{} \let\Input\input \ifsrc@everypar@ \newcommand*\src@old@everypar{} \let\src@old@everypar\everypar \newtoks\src@new@everypar \let\everypar\src@new@everypar \everypar\expandafter{\the\src@old@everypar} \src@old@everypar{\the\src@new@everypar\src@spec} \fi \ifsrc@everymath@ \def\@tempa#1\the\everymath#2\delimiter{{#1\src@spec\the\everymath#2}} \frozen@everymath=\expandafter\@tempa\the\frozen@everymath\delimiter \fi \newcommand*\src@bibliography{} \newcommand*\src@@bibliography{} \let\src@bibliography\bibliography \def\bibliography#1{% \expandafter\src@@bibliography\expandafter{\CurrentInput}{#1}% } \def\src@@bibliography#1#2{% \protected@xdef\CurrentInput{\jobname.bbl}% \src@before@file@hook \src@bibliography{#2}% \src@after@file@hook{#1}% } \newcommand*\src@old@output{} \let\src@old@output\output \newtoks\src@new@output \let\output\src@new@output \output\expandafter{\the\src@old@output} \src@old@output{\SRCOKfalse\the\src@new@output} \endinput %% %% End of file `srcltx.sty'. gbm/inst/doc/shrinkageplot.R0000644000176000001440000000361212102666411015567 0ustar ripleyusersif(FALSE) { library(gbm) N <- 10000 X1 <- runif(N) X2 <- 2*runif(N) X3 <- ordered(sample(letters[1:4],N,replace=TRUE),levels=letters[4:1]) X4 <- factor(sample(letters[1:6],N,replace=TRUE)) X5 <- factor(sample(letters[1:3],N,replace=TRUE)) X6 <- 3*runif(N) mu <- c(-1,0,1,2)[as.numeric(X3)] SNR <- 10 # signal-to-noise ratio Y <- X1**1.5 + 2 * (X2**.5) + mu sigma <- sqrt(var(Y)/SNR) Y <- Y + rnorm(N,0,sigma) # introduce some missing values X1[sample(1:N,size=500)] <- NA X4[sample(1:N,size=300)] <- NA data <- data.frame(Y=Y,X1=X1,X2=X2,X3=X3,X4=X4,X5=X5,X6=X6) # fit initial model shrink <- c(0.1,0.05,0.01,0.005,0.001) err <- vector("list",length(shrink)) for(i in 1:length(shrink)) { gbm1 <- gbm(Y~X1+X2+X3+X4+X5+X6, data=data, distribution="gaussian", n.trees=10000, shrinkage=shrink[i], interaction.depth=3, bag.fraction = 0.5, train.fraction = 0.2, n.minobsinnode = 10, keep.data=FALSE, verbose=TRUE) err[[i]] <- gbm1$valid.error } ylim <- range(unlist(lapply(err,range))) mins <- min(sapply(err,min)) ylim <- c(0.19,0.21) postscript("shrinkage-v-iterations.eps",horizontal=FALSE,width=9,height=6) plot(0,0,ylim=ylim,xlim=c(0,10000),type="n",xlab="Iterations",ylab="Squared error") for(i in 1:length(shrink)) { x <- which.min(err[[i]]) y <- err[[i]][x] j <- round(seq(1,10000,length=500)) j <- sort(c(j,x)) #k <- which((err[[i]][j] > ylim[1]) & (err[[i]][j] < ylim[2])) #k <- unique(c(1,k)) k <- 1:length(j) lines(j[k],err[[i]][j][k],col=i) rug(x, col=i) text(x,y-0.0005,as.character(shrink[i]),adj=1) } abline(h=min(mins)) dev.off() }gbm/inst/doc/gbm.tex0000644000176000001440000007567012102666411014076 0ustar ripleyusers% setwd("c:/dev/gbm/inst/doc") % Sweave("gbm.rnw"); system("texify gbm.tex"); system("c:\\MiKTeX\\texmf\\miktex\\bin\\yap.exe gbm.dvi",wait=FALSE) \documentclass{article} \bibliographystyle{plain} \usepackage[active]{srcltx} \newcommand{\EV}{\mathrm{E}} \newcommand{\Var}{\mathrm{Var}} \newcommand{\aRule}{\begin{center} \rule{5in}{1mm} \end{center}} \title{Generalized Boosted Models:\\A guide to the gbm package} \author{Greg Ridgeway} %\VignetteIndexEntry{Generalized Boosted Models: A guide to the gbm package} \newcommand{\mathgbf}[1]{{\mbox{\boldmath$#1$\unboldmath}}} \usepackage{Sweave} \begin{document} \maketitle Boosting takes on various forms with different programs using different loss functions, different base models, and different optimization schemes. The gbm package takes the approach described in \cite{Friedman:2001} and \cite{Friedman:2002}. Some of the terminology differs, mostly due to an effort to cast boosting terms into more standard statistical terminology (e.g. deviance). In addition, the gbm package implements boosting for models commonly used in statistics but not commonly associated with boosting. The Cox proportional hazard model, for example, is an incredibly useful model and the boosting framework applies quite readily with only slight modification \cite{Ridgeway:1999}. Also some algorithms implemented in the gbm package differ from the standard implementation. The AdaBoost algorithm \cite{FreundSchapire:1997} has a particular loss function and a particular optimization algorithm associated with it. The gbm implementation of AdaBoost adopts AdaBoost's exponential loss function (its bound on misclassification rate) but uses Friedman's gradient descent algorithm rather than the original one proposed. So the main purposes of this document is to spell out in detail what the gbm package implements. \section{Gradient boosting} This section essentially presents the derivation of boosting described in \cite{Friedman:2001}. The gbm package also adopts the stochastic gradient boosting strategy, a small but important tweak on the basic algorithm, described in \cite{Friedman:2002}. \subsection{Friedman's gradient boosting machine} \label{sec:GradientBoostingMachine} \begin{figure} \aRule Initialize $\hat f(\mathbf{x})$ to be a constant, $\hat f(\mathbf{x}) = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\rho)$. \\ For $t$ in $1,\ldots,T$ do \begin{enumerate} \item Compute the negative gradient as the working response \begin{equation} z_i = -\frac{\partial}{\partial f(\mathbf{x}_i)} \Psi(y_i,f(\mathbf{x}_i)) \mbox{\Huge $|$}_{f(\mathbf{x}_i)=\hat f(\mathbf{x}_i)} \end{equation} \item Fit a regression model, $g(\mathbf{x})$, predicting $z_i$ from the covariates $\mathbf{x}_i$. \item Choose a gradient descent step size as \begin{equation} \rho = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\hat f(\mathbf{x}_i)+\rho g(\mathbf{x}_i)) \end{equation} \item Update the estimate of $f(\mathbf{x})$ as \begin{equation} \hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \rho g(\mathbf{x}) \end{equation} \end{enumerate} \aRule \caption{Friedman's Gradient Boost algorithm} \label{fig:GradientBoost} \end{figure} Friedman (2001) and the companion paper Friedman (2002) extended the work of Friedman, Hastie, and Tibshirani (2000) and laid the ground work for a new generation of boosting algorithms. Using the connection between boosting and optimization, this new work proposes the Gradient Boosting Machine. In any function estimation problem we wish to find a regression function, $\hat f(\mathbf{x})$, that minimizes the expectation of some loss function, $\Psi(y,f)$, as shown in (\ref{NonparametricRegression1}). \begin{eqnarray} \hspace{0.5in} \hat f(\mathbf{x}) &=& \arg \min_{f(\mathbf{x})} \EV_{y,\mathbf{x}} \Psi(y,f(\mathbf{x})) \nonumber \\ \label{NonparametricRegression1} &=& \arg \min_{f(\mathbf{x})} \EV_x \left[ \EV_{y|\mathbf{x}} \Psi(y,f(\mathbf{x})) \Big| \mathbf{x} \right] \end{eqnarray} We will focus on finding estimates of $f(\mathbf{x})$ such that \begin{equation} \label{NonparametricRegression2} \hspace{0.5in} \hat f(\mathbf{x}) = \arg \min_{f(\mathbf{x})} \EV_{y|\mathbf{x}} \left[ \Psi(y,f(\mathbf{x}))|\mathbf{x} \right] \end{equation} Parametric regression models assume that $f(\mathbf{x})$ is a function with a finite number of parameters, $\beta$, and estimates them by selecting those values that minimize a loss function (e.g. squared error loss) over a training sample of $N$ observations on $(y,\mathbf{x})$ pairs as in (\ref{eq:Friedman1}). \begin{equation} \label{eq:Friedman1} \hspace{0.5in} \hat\beta = \arg \min_{\beta} \sum_{i=1}^N \Psi(y_i,f(\mathbf{x}_i;\beta)) \end{equation} When we wish to estimate $f(\mathbf{x})$ non-parametrically the task becomes more difficult. Again we can proceed similarly to \cite{FHT:2000} and modify our current estimate of $f(\mathbf{x})$ by adding a new function $f(\mathbf{x})$ in a greedy fashion. Letting $f_i = f(\mathbf{x}_i)$, we see that we want to decrease the $N$ dimensional function \begin{eqnarray} \label{EQ:Friedman2} \hspace{0.5in} J(\mathbf{f}) &=& \sum_{i=1}^N \Psi(y_i,f(\mathbf{x}_i)) \nonumber \\ &=& \sum_{i=1}^N \Psi(y_i,F_i). \end{eqnarray} The negative gradient of $J(\mathbf{f})$ indicates the direction of the locally greatest decrease in $J(\mathbf{f})$. Gradient descent would then have us modify $\mathbf{f}$ as \begin{equation} \label{eq:Friedman3} \hspace{0.5in} \hat \mathbf{f} \leftarrow \hat \mathbf{f} - \rho \nabla J(\mathbf{f}) \end{equation} where $\rho$ is the size of the step along the direction of greatest descent. Clearly, this step alone is far from our desired goal. First, it only fits $f$ at values of $\mathbf{x}$ for which we have observations. Second, it does not take into account that observations with similar $\mathbf{x}$ are likely to have similar values of $f(\mathbf{x})$. Both these problems would have disastrous effects on generalization error. However, Friedman suggests selecting a class of functions that use the covariate information to approximate the gradient, usually a regression tree. This line of reasoning produces his Gradient Boosting algorithm shown in Figure~\ref{fig:GradientBoost}. At each iteration the algorithm determines the direction, the gradient, in which it needs to improve the fit to the data and selects a particular model from the allowable class of functions that is in most agreement with the direction. In the case of squared-error loss, $\Psi(y_i,f(\mathbf{x}_i)) = \sum_{i=1}^N (y_i-f(\mathbf{x}_i))^2$, this algorithm corresponds exactly to residual fitting. There are various ways to extend and improve upon the basic framework suggested in Figure~\ref{fig:GradientBoost}. For example, Friedman (2001) substituted several choices in for $\Psi$ to develop new boosting algorithms for robust regression with least absolute deviation and Huber loss functions. Friedman (2002) showed that a simple subsampling trick can greatly improve predictive performance while simultaneously reduce computation time. Section~\ref{GBMModifications} discusses some of these modifications. \section{Improving boosting methods using control of the learning rate, sub-sampling, and a decomposition for interpretation} \label{GBMModifications} This section explores the variations of the previous algorithms that have the potential to improve their predictive performance and interpretability. In particular, by controlling the optimization speed or learning rate, introducing low-variance regression methods, and applying ideas from robust regression we can produce non-parametric regression procedures with many desirable properties. As a by-product some of these modifications lead directly into implementations for learning from massive datasets. All these methods take advantage of the general form of boosting \begin{equation} \hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \EV(z(y,\hat f(\mathbf{x}))|\mathbf{x}). \end{equation} So far we have taken advantage of this form only by substituting in our favorite regression procedure for $\EV_w(z|\mathbf{x})$. I will discuss some modifications to estimating $\EV_w(z|\mathbf{x})$ that have the potential to improve our algorithm. \subsection{Decreasing the learning rate} As several authors have phrased slightly differently, ``...boosting, whatever flavor, seldom seems to overfit, no matter how many terms are included in the additive expansion''. This is not true as the discussion to \cite{FHT:2000} points out. In the update step of any boosting algorithm we can introduce a learning rate to dampen the proposed move. \begin{equation} \label{eq:shrinkage} \hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \lambda \EV(z(y,\hat f(\mathbf{x}))|\mathbf{x}). \end{equation} By multiplying the gradient step by $\lambda$ as in equation~\ref{eq:shrinkage} we have control on the rate at which the boosting algorithm descends the error surface (or ascends the likelihood surface). When $\lambda=1$ we return to performing full gradient steps. Friedman (2001) relates the learning rate to regularization through shrinkage. The optimal number of iterations, $T$, and the learning rate, $\lambda$, depend on each other. In practice I set $\lambda$ to be as small as possible and then select $T$ by cross-validation. Performance is best when $\lambda$ is as small as possible performance with decreasing marginal utility for smaller and smaller $\lambda$. Slower learning rates do not necessarily scale the number of optimal iterations. That is, if when $\lambda=1.0$ and the optimal $T$ is 100 iterations, does {\it not} necessarily imply that when $\lambda=0.1$ the optimal $T$ is 1000 iterations. \subsection{Variance reduction using subsampling} Friedman (2002) proposed the stochastic gradient boosting algorithm that simply samples uniformly without replacement from the dataset before estimating the next gradient step. He found that this additional step greatly improved performance. We estimate the regression $\EV(z(y,\hat f(\mathbf{x}))|\mathbf{x})$ using a random subsample of the dataset. \subsection{ANOVA decomposition} Certain function approximation methods are decomposable in terms of a ``functional ANOVA decomposition''. That is a function is decomposable as \begin{equation} \label{ANOVAdecomp} f(\mathbf{x}) = \sum_j f_j(x_j) + \sum_{jk} f_{jk}(x_j,x_k) + \sum_{jk\ell} f_{jk\ell}(x_j,x_k,x_\ell) + \cdots. \end{equation} This applies to boosted trees. Regression stumps (one split decision trees) depend on only one variable and fall into the first term of \ref{ANOVAdecomp}. Trees with two splits fall into the second term of \ref{ANOVAdecomp} and so on. By restricting the depth of the trees produced on each boosting iteration we can control the order of approximation. Often additive components are sufficient to approximate a multivariate function well, generalized additive models, the na\"{\i}ve Bayes classifier, and boosted stumps for example. When the approximation is restricted to a first order we can also produce plots of $x_j$ versus $f_j(x_j)$ to demonstrate how changes in $x_j$ might affect changes in the response variable. \subsection{Relative influence} Friedman (2001) also develops an extension of a variable's ``relative influence'' for boosted estimates. For tree based methods the approximate relative influence of a variable $x_j$ is \begin{equation} \label{RelInfluence} \hspace{0.5in} \hat J_j^2 = \hspace{-0.1in}\sum_{\mathrm{splits~on~}x_j}\hspace{-0.2in}I_t^2 \end{equation} where $I_t^2$ is the empirical improvement by splitting on $x_j$ at that point. Friedman's extension to boosted models is to average the relative influence of variable $x_j$ across all the trees generated by the boosting algorithm. \begin{figure} \aRule Select \begin{itemize} \item a loss function (\texttt{distribution}) \item the number of iterations, $T$ (\texttt{n.trees}) \item the depth of each tree, $K$ (\texttt{interaction.depth}) \item the shrinkage (or learning rate) parameter, $\lambda$ (\texttt{shrinkage}) \item the subsampling rate, $p$ (\texttt{bag.fraction}) \end{itemize} Initialize $\hat f(\mathbf{x})$ to be a constant, $\hat f(\mathbf{x}) = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\rho)$ \\ For $t$ in $1,\ldots,T$ do \begin{enumerate} \item Compute the negative gradient as the working response \begin{equation} z_i = -\frac{\partial}{\partial f(\mathbf{x}_i)} \Psi(y_i,f(\mathbf{x}_i)) \mbox{\Huge $|$}_{f(\mathbf{x}_i)=\hat f(\mathbf{x}_i)} \end{equation} \item Randomly select $p\times N$ cases from the dataset \item Fit a regression tree with $K$ terminal nodes, $g(\mathbf{x})=\EV(z|\mathbf{x})$. This tree is fit using only those randomly selected observations \item Compute the optimal terminal node predictions, $\rho_1,\ldots,\rho_K$, as \begin{equation} \rho_k = \arg \min_{\rho} \sum_{\mathbf{x}_i\in S_k} \Psi(y_i,\hat f(\mathbf{x}_i)+\rho) \end{equation} where $S_k$ is the set of $\mathbf{x}$s that define terminal node $k$. Again this step uses only the randomly selected observations. \item Update $\hat f(\mathbf{x})$ as \begin{equation} \hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \lambda\rho_{k(\mathbf{x})} \end{equation} where $k(\mathbf{x})$ indicates the index of the terminal node into which an observation with features $\mathbf{x}$ would fall. \end{enumerate} \aRule \caption{Boosting as implemented in \texttt{gbm()}} \label{fig:gbm} \end{figure} \section{Common user options} This section discusses the options to gbm that most users will need to change or tune. \subsection{Loss function} The first and foremost choice is \texttt{distribution}. This should be easily dictated by the application. For most classification problems either \texttt{bernoulli} or \texttt{adaboost} will be appropriate, the former being recommended. For continuous outcomes the choices are \texttt{gaussian} (for minimizing squared error), \texttt{laplace} (for minimizing absolute error), and quantile regression (for estimating percentiles of the conditional distribution of the outcome). Censored survival outcomes should require \texttt{coxph}. Count outcomes may use \texttt{poisson} although one might also consider \texttt{gaussian} or \texttt{laplace} depending on the analytical goals. \subsection{The relationship between shrinkage and number of iterations} The issues that most new users of gbm struggle with are the choice of \texttt{n.trees} and \texttt{shrinkage}. It is important to know that smaller values of \texttt{shrinkage} (almost) always give improved predictive performance. That is, setting \texttt{shrinkage=0.001} will almost certainly result in a model with better out-of-sample predictive performance than setting \texttt{shrinkage=0.01}. However, there are computational costs, both storage and CPU time, associated with setting \texttt{shrinkage} to be low. The model with \texttt{shrinkage=0.001} will likely require ten times as many iterations as the model with \texttt{shrinkage=0.01}, increasing storage and computation time by a factor of 10. Figure~\ref{fig:shrinkViters} shows the relationship between predictive performance, the number of iterations, and the shrinkage parameter. Note that the increase in the optimal number of iterations between two choices for shrinkage is roughly equal to the ratio of the shrinkage parameters. It is generally the case that for small shrinkage parameters, 0.001 for example, there is a fairly long plateau in which predictive performance is at its best. My rule of thumb is to set \texttt{shrinkage} as small as possible while still being able to fit the model in a reasonable amount of time and storage. I usually aim for 3,000 to 10,000 iterations with shrinkage rates between 0.01 and 0.001. \begin{figure}[ht] \begin{center} \includegraphics[width=5in]{shrinkage-v-iterations} \end{center} \caption{Out-of-sample predictive performance by number of iterations and shrinkage. Smaller values of the shrinkage parameter offer improved predictive performance, but with decreasing marginal improvement.} \label{fig:shrinkViters} \end{figure} \subsection{Estimating the optimal number of iterations} gbm offers three methods for estimating the optimal number of iterations after the gbm model has been fit, an independent test set (\texttt{test}), out-of-bag estimation (\texttt{OOB}), and $v$-fold cross validation (\texttt{cv}). The function \texttt{gbm.perf} computes the iteration estimate. Like Friedman's MART software, the independent test set method uses a single holdout test set to select the optimal number of iterations. If \texttt{train.fraction} is set to be less than 1, then only the \textit{first} \texttt{train.fraction}$\times$\texttt{nrow(data)} will be used to fit the model. Note that if the data are sorted in a systematic way (such as cases for which $y=1$ come first), then the data should be shuffled before running gbm. Those observations not used in the model fit can be used to get an unbiased estimate of the optimal number of iterations. The downside of this method is that a considerable number of observations are used to estimate the single regularization parameter (number of iterations) leaving a reduced dataset for estimating the entire multivariate model structure. Use \texttt{gbm.perf(...,method="test")} to obtain an estimate of the optimal number of iterations using the held out test set. If \texttt{bag.fraction} is set to be greater than 0 (0.5 is recommended), gbm computes an out-of-bag estimate of the improvement in predictive performance. It evaluates the reduction in deviance on those observations not used in selecting the next regression tree. The out-of-bag estimator underestimates the reduction in deviance. As a result, it almost always is too conservative in its selection for the optimal number of iterations. The motivation behind this method was to avoid having to set aside a large independent dataset, which reduces the information available for learning the model structure. Use \texttt{gbm.perf(...,method="OOB")} to obtain the OOB estimate. Lastly, gbm offers $v$-fold cross validation for estimating the optimal number of iterations. If when fitting the gbm model, \texttt{cv.folds=5} then gbm will do 5-fold cross validation. gbm will fit five gbm models in order to compute the cross validation error estimate and then will fit a sixth and final gbm model with \texttt{n.trees}iterations using all of the data. The returned model object will have a component labeled \texttt{cv.error}. Note that \texttt{gbm.more} will do additional gbm iterations but will not add to the \texttt{cv.error} component. Use \texttt{gbm.perf(...,method="cv")} to obtain the cross validation estimate. \begin{figure}[ht] \begin{center} \includegraphics[width=5in]{oobperf2} \end{center} \caption{Out-of-sample predictive performance of four methods of selecting the optimal number of iterations. The vertical axis plots performance relative the best. The boxplots indicate relative performance across thirteen real datasets from the UCI repository. See \texttt{demo(OOB-reps)}.} \label{fig:oobperf} \end{figure} Figure~\ref{fig:oobperf} compares the three methods for estimating the optimal number of iterations across 13 datasets. The boxplots show the methods performance relative to the best method on that dataset. For most datasets the method perform similarly, however, 5-fold cross validation is consistently the best of them. OOB, using a 33\% test set, and using a 20\% test set all have datasets for which the perform considerably worse than the best method. My recommendation is to use 5- or 10-fold cross validation if you can afford the computing time. Otherwise you may choose among the other options, knowing that OOB is conservative. \section{Available distributions} This section gives some of the mathematical detail for each of the distribution options that gbm offers. The gbm engine written in C++ has access to a C++ class for each of these distributions. Each class contains methods for computing the associated deviance, initial value, the gradient, and the constants to predict in each terminal node. In the equations shown below, for non-zero offset terms, replace $f(\mathbf{x}_i)$ with $o_i + f(\mathbf{x}_i)$. \subsection{Gaussian} \begin{tabular}{ll} Deviance & $\displaystyle \frac{1}{\sum w_i} \sum w_i(y_i-f(\mathbf{x}_i))^2$ \\ Initial value & $\displaystyle f(\mathbf{x})=\frac{\sum w_i(y_i-o_i)}{\sum w_i}$ \\ Gradient & $z_i=y_i - f(\mathbf{x}_i)$ \\ Terminal node estimates & $\displaystyle \frac{\sum w_i(y_i-f(\mathbf{x}_i))}{\sum w_i}$ \end{tabular} \subsection{AdaBoost} \begin{tabular}{ll} Deviance & $\displaystyle \frac{1}{\sum w_i} \sum w_i\exp(-(2y_i-1)f(\mathbf{x}_i))$ \\ Initial value & $\displaystyle \frac{1}{2}\log\frac{\sum y_iw_ie^{-o_i}}{\sum (1-y_i)w_ie^{o_i}}$ \\ Gradient & $\displaystyle z_i= -(2y_i-1)\exp(-(2y_i-1)f(\mathbf{x}_i))$ \\ Terminal node estimates & $\displaystyle \frac{\sum (2y_i-1)w_i\exp(-(2y_i-1)f(\mathbf{x}_i))} {\sum w_i\exp(-(2y_i-1)f(\mathbf{x}_i))}$ \end{tabular} \subsection{Bernoulli} \begin{tabular}{ll} Deviance & $\displaystyle -2\frac{1}{\sum w_i} \sum w_i(y_if(\mathbf{x}_i)-\log(1+\exp(f(\mathbf{x}_i))))$ \\ Initial value & $\displaystyle \log\frac{\sum w_iy_i}{\sum w_i(1-y_i)}$ \\ Gradient & $\displaystyle z_i=y_i-\frac{1}{1+\exp(-f(\mathbf{x}_i))}$ \\ Terminal node estimates & $\displaystyle \frac{\sum w_i(y_i-p_i)}{\sum w_ip_i(1-p_i)}$ \\ & where $\displaystyle p_i = \frac{1}{1+\exp(-f(\mathbf{x}_i))}$ \\ \end{tabular} Notes: \begin{itemize} \item For non-zero offset terms, the computation of the initial value requires Newton-Raphson. Initialize $f_0=0$ and iterate $\displaystyle f_0 \leftarrow f_0 + \frac{\sum w_i(y_i-p_i)}{\sum w_ip_i(1-p_i)}$ where $\displaystyle p_i = \frac{1}{1+\exp(-(o_i+f_0))}$. \end{itemize} \subsection{Laplace} \begin{tabular}{ll} Deviance & $\frac{1}{\sum w_i} \sum w_i|y_i-f(\mathbf{x}_i)|$ \\ Initial value & $\mbox{median}_w(y)$ \\ Gradient & $z_i=\mbox{sign}(y_i-f(\mathbf{x}_i))$ \\ Terminal node estimates & $\mbox{median}_w(z)$ \end{tabular} Notes: \begin{itemize} \item $\mbox{median}_w(y)$ denotes the weighted median, defined as the solution to the equation $\frac{\sum w_iI(y_i\leq m)}{\sum w_i}=\frac{1}{2}$ \item \texttt{gbm()} currently does not implement the weighted median and issues a warning when the user uses weighted data with \texttt{distribution="laplace"}. \end{itemize} \subsection{Quantile regression} Contributed by Brian Kriegler (see \cite{Kriegler:2010}). \begin{tabular}{ll} Deviance & $\frac{1}{\sum w_i} \left(\alpha\sum_{y_i>f(\mathbf{x}_i)} w_i(y_i-f(\mathbf{x}_i))\right. +$ \\ & \hspace{0.5in}$\left.(1-\alpha)\sum_{y_i\leq f(\mathbf{x}_i)} w_i(f(\mathbf{x}_i)-y_i)\right)$ \\ Initial value & $\mathrm{quantile}^{(\alpha)}_w(y)$ \\ Gradient & $z_i=\alpha I(y_i>f(\mathbf{x}_i))-(1-\alpha)I(y_i\leq f(\mathbf{x}_i))$ \\ Terminal node estimates & $\mathrm{quantile}^{(\alpha)}_w(z)$ \end{tabular} Notes: \begin{itemize} \item $\mathrm{quantile}^{(\alpha)}_w(y)$ denotes the weighted quantile, defined as the solution to the equation $\frac{\sum w_iI(y_i\leq q)}{\sum w_i}=\alpha$ \item \texttt{gbm()} currently does not implement the weighted median and issues a warning when the user uses weighted data with \texttt{distribution=list(name="quantile")}. \end{itemize} \subsection{Cox Proportional Hazard} \begin{tabular}{ll} Deviance & $-2\sum w_i(\delta_i(f(\mathbf{x}_i)-\log(R_i/w_i)))$\\ Gradient & $\displaystyle z_i=\delta_i - \sum_j \delta_j \frac{w_jI(t_i\geq t_j)e^{f(\mathbf{x}_i)}} {\sum_k w_kI(t_k\geq t_j)e^{f(\mathbf{x}_k)}}$ \\ Initial value & 0 \\ Terminal node estimates & Newton-Raphson algorithm \end{tabular} \begin{enumerate} \item Initialize the terminal node predictions to 0, $\mathgbf{\rho}=0$ \item Let $\displaystyle p_i^{(k)}=\frac{\sum_j I(k(j)=k)I(t_j\geq t_i)e^{f(\mathbf{x}_i)+\rho_k}} {\sum_j I(t_j\geq t_i)e^{f(\mathbf{x}_i)+\rho_k}}$ \item Let $g_k=\sum w_i\delta_i\left(I(k(i)=k)-p_i^{(k)}\right)$ \item Let $\mathbf{H}$ be a $k\times k$ matrix with diagonal elements \begin{enumerate} \item Set diagonal elements $H_{mm}=\sum w_i\delta_i p_i^{(m)}\left(1-p_i^{(m)}\right)$ \item Set off diagonal elements $H_{mn}=-\sum w_i\delta_i p_i^{(m)}p_i^{(n)}$ \end{enumerate} \item Newton-Raphson update $\mathgbf{\rho} \leftarrow \mathgbf{\rho} - \mathbf{H}^{-1}\mathbf{g}$ \item Return to step 2 until convergence \end{enumerate} Notes: \begin{itemize} \item $t_i$ is the survival time and $\delta_i$ is the death indicator. \item $R_i$ denotes the hazard for the risk set, $R_i=\sum_{j=1}^N w_jI(t_j\geq t_i)e^{f(\mathbf{x}_i)}$ \item $k(i)$ indexes the terminal node of observation $i$ \item For speed, \texttt{gbm()} does only one step of the Newton-Raphson algorithm rather than iterating to convergence. No appreciable loss of accuracy since the next boosting iteration will simply correct for the prior iterations inadequacy. \item \texttt{gbm()} initially sorts the data by survival time. Doing this reduces the computation of the risk set from $O(n^2)$ to $O(n)$ at the cost of a single up front sort on survival time. After the model is fit, the data are then put back in their original order. \end{itemize} \subsection{Poisson} \begin{tabular}{ll} Deviance & -2$\frac{1}{\sum w_i} \sum w_i(y_if(\mathbf{x}_i)-\exp(f(\mathbf{x}_i)))$ \\ Initial value & $\displaystyle f(\mathbf{x})= \log\left(\frac{\sum w_iy_i}{\sum w_ie^{o_i}}\right)$ \\ Gradient & $z_i=y_i - \exp(f(\mathbf{x}_i))$ \\ Terminal node estimates & $\displaystyle \log\frac{\sum w_iy_i}{\sum w_i\exp(f(\mathbf{x}_i))}$ \end{tabular} The Poisson class includes special safeguards so that the most extreme predicted values are $e^{-19}$ and $e^{+19}$. This behavior is consistent with \texttt{glm()}. \subsection{Pairwise} This distribution implements ranking measures following the \emph{LambdaMart} algorithm \cite{Burges:2010}. Instances belong to \emph{groups}; all pairs of items with different labels, belonging to the same group, are used for training. In \emph{Information Retrieval} applications, groups correspond to user queries, and items to (feature vectors of) documents in the associated match set to be ranked. For consistency with typical usage, our goal is to \emph{maximize} one of the \emph{utility} functions listed below. Consider a group with instances $x_1, \dots, x_n$, ordered such that $f(x_1) \geq f(x_2) \geq \dots f(x_n)$; i.e., the \emph{rank} of $x_i$ is $i$, where smaller ranks are preferable. Let $P$ be the set of all ordered pairs such that $y_i > y_j$. \begin{enumerate} \item[{\bf Concordance:}] Fraction of concordant (i.e, correctly ordered) pairs. For the special case of binary labels, this is equivalent to the Area under the ROC Curve. $$\left\{ \begin{array}{l l}\frac{\|\{(i,j)\in P | f(x_i)>f(x_j)\}\|}{\|P\|} & P \neq \emptyset\\ 0 & \mbox{otherwise.} \end{array}\right. $$ \item[{\bf MRR:}] Mean reciprocal rank of the highest-ranked positive instance (it is assumed $y_i\in\{0,1\}$): $$\left\{ \begin{array}{l l}\frac{1}{\min\{1 \leq i \leq n |y_i=1\}} & \exists i: \, 1 \leq i \leq n, y_i=1\\ 0 & \mbox{otherwise.}\end{array}\right.$$ \item[{\bf MAP:}] Mean average precision, a generalization of MRR to multiple positive instances: $$\left\{ \begin{array}{l l} \frac{\sum_{1\leq i\leq n | y_i=1} \|\{1\leq j\leq i |y_j=1\}\|\,/\,i}{\|\{1\leq i\leq n | y_i=1\}\|} & \exists i: \, 1 \leq i \leq n, y_i=1\\ 0 & \mbox{otherwise.}\end{array}\right.$$ \item[{\bf nDCG:}] Normalized discounted cumulative gain: $$\frac{\sum_{1\leq i\leq n} \log_2(i+1) \, y_i}{\sum_{1\leq i\leq n} \log_2(i+1) \, y'_i},$$ where $y'_1, \dots, y'_n$ is a reordering of $y_1, \dots,y_n$ with $y'_1 \geq y'_2 \geq \dots \geq y'_n$. \end{enumerate} The generalization to multiple (possibly weighted) groups is straightforward. Sometimes a cut-off rank $k$ is given for \emph{MRR} and \emph{nDCG}, in which case we replace the outer index $n$ by $\min(n,k)$. The initial value for $f(x_i)$ is always zero. We derive the gradient of a cost function whose gradient locally approximates the gradient of the IR measure for a fixed ranking: \begin{eqnarray*} \Phi & = & \sum_{(i,j) \in P} \Phi_{ij}\\ & = & \sum_{(i,j) \in P} |\Delta Z_{ij}| \log \left( 1 + e^{-(f(x_i) - f(x_j))}\right), \end{eqnarray*} where $|\Delta Z_{ij}|$ is the absolute utility difference when swapping the ranks of $i$ and $j$, while leaving all other instances the same. Define \begin{eqnarray*} \lambda_{ij} & = & \frac{\partial\Phi_{ij}}{\partial f(x_i)}\\ & = & - |\Delta Z_{ij}| \frac{1}{1 + e^{f(x_i) - f(x_j)}}\\ & = & - |\Delta Z_{ij}| \, \rho_{ij}, \end{eqnarray*} with $$ \rho_{ij} = - \frac{\lambda_{ij }}{|\Delta Z_{ij}|} = \frac{1}{1 + e^{f(x_i) - f(x_j)}}$$ For the gradient of $\Phi$ with respect to $f(x_i)$, define \begin{eqnarray*} \lambda_i & = & \frac{\partial \Phi}{\partial f(x_i)}\\ & = & \sum_{j|(i,j) \in P} \lambda_{ij} - \sum_{j|(j,i) \in P} \lambda_{ji}\\ & = & - \sum_{j|(i,j) \in P} |\Delta Z_{ij}| \, \rho_{ij}\\ & & \mbox{} + \sum_{j|(j,i) \in P} |\Delta Z_{ji}| \, \rho_{ji}. \end{eqnarray*} The second derivative is \begin{eqnarray*} \gamma_i & \stackrel{def}{=} & \frac{\partial^2\Phi}{\partial f(x_i)^2}\\ & = & \sum_{j|(i,j) \in P} |\Delta Z_{ij}| \, \rho_{ij} \, (1-\rho_{ij})\\ & & \mbox{} + \sum_{j|(j,i) \in P} |\Delta Z_{ji}| \, \rho_{ji} \, (1-\rho_{ji}). \end{eqnarray*} Now consider again all groups with associated weights. For a given terminal node, let $i$ range over all contained instances. Then its estimate is $$-\frac{\sum_i v_i\lambda_{i}}{\sum_i v_i \gamma_i},$$ where $v_i=w(\mbox{\em group}(i))/\|\{(j,k)\in\mbox{\em group}(i)\}\|.$ In each iteration, instances are reranked according to the preliminary scores $f(x_i)$ to determine the $|\Delta Z_{ij}|$. Note that in order to avoid ranking bias, we break ties by adding a small amount of random noise. \begin{thebibliography}{77} % start the bibliography \small % put the bibliography in a small font \bibitem{FreundSchapire:1997} Y. Freund and R.E. Schapire (1997). ``A decision-theoretic generalization of on-line learning and an application to boosting,'' \textit{Journal of Computer and System Sciences}, 55(1):119-139. \bibitem{Friedman:2001} J.H. Friedman (2001). ``Greedy Function Approximation: A Gradient Boosting Machine,'' \textit{Annals of Statistics} 29(5):1189-1232. \bibitem{Friedman:2002} J.H. Friedman (2002). ``Stochastic Gradient Boosting,'' \textit{Computational Statistics and Data Analysis} 38(4):367-378. \bibitem{FHT:2000} J.H. Friedman, T. Hastie, R. Tibshirani (2000). ``Additive Logistic Regression: a Statistical View of Boosting,'' \textit{Annals of Statistics} 28(2):337-374. \bibitem{Kriegler:2010} B. Kriegler and R. Berk (2010). ``Small Area Estimation of the Homeless in Los Angeles, An Application of Cost-Sensitive Stochastic Gradient Boosting,'' \textit{Annals of Applied Statistics} 4(3):1234-1255. \bibitem{Ridgeway:1999} G. Ridgeway (1999). ``The state of boosting,'' \textit{Computing Science and Statistics} 31:172-181. \bibitem{Burges:2010} C. Burges (2010). ``From RankNet to LambdaRank to LambdaMART: An Overview'', \textit{Microsoft Research Technical Report MSR-TR-2010-82} \end{thebibliography} % end the bibliography \end{document} gbm/inst/doc/gbm.Sweave0000644000176000001440000007564512134211007014522 0ustar ripleyusers% setwd("c:/dev/gbm/inst/doc") % Sweave("gbm.rnw"); system("texify gbm.tex"); system("c:\\MiKTeX\\texmf\\miktex\\bin\\yap.exe gbm.dvi",wait=FALSE) \documentclass{article} \bibliographystyle{plain} \usepackage[active]{srcltx} \newcommand{\EV}{\mathrm{E}} \newcommand{\Var}{\mathrm{Var}} \newcommand{\aRule}{\begin{center} \rule{5in}{1mm} \end{center}} \title{Generalized Boosted Models:\\A guide to the gbm package} \author{Greg Ridgeway} %\VignetteIndexEntry{Generalized Boosted Models: A guide to the gbm package} \newcommand{\mathgbf}[1]{{\mbox{\boldmath$#1$\unboldmath}}} \begin{document} \maketitle Boosting takes on various forms with different programs using different loss functions, different base models, and different optimization schemes. The gbm package takes the approach described in \cite{Friedman:2001} and \cite{Friedman:2002}. Some of the terminology differs, mostly due to an effort to cast boosting terms into more standard statistical terminology (e.g. deviance). In addition, the gbm package implements boosting for models commonly used in statistics but not commonly associated with boosting. The Cox proportional hazard model, for example, is an incredibly useful model and the boosting framework applies quite readily with only slight modification \cite{Ridgeway:1999}. Also some algorithms implemented in the gbm package differ from the standard implementation. The AdaBoost algorithm \cite{FreundSchapire:1997} has a particular loss function and a particular optimization algorithm associated with it. The gbm implementation of AdaBoost adopts AdaBoost's exponential loss function (its bound on misclassification rate) but uses Friedman's gradient descent algorithm rather than the original one proposed. So the main purposes of this document is to spell out in detail what the gbm package implements. \section{Gradient boosting} This section essentially presents the derivation of boosting described in \cite{Friedman:2001}. The gbm package also adopts the stochastic gradient boosting strategy, a small but important tweak on the basic algorithm, described in \cite{Friedman:2002}. \subsection{Friedman's gradient boosting machine} \label{sec:GradientBoostingMachine} \begin{figure} \aRule Initialize $\hat f(\mathbf{x})$ to be a constant, $\hat f(\mathbf{x}) = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\rho)$. \\ For $t$ in $1,\ldots,T$ do \begin{enumerate} \item Compute the negative gradient as the working response \begin{equation} z_i = -\frac{\partial}{\partial f(\mathbf{x}_i)} \Psi(y_i,f(\mathbf{x}_i)) \mbox{\Huge $|$}_{f(\mathbf{x}_i)=\hat f(\mathbf{x}_i)} \end{equation} \item Fit a regression model, $g(\mathbf{x})$, predicting $z_i$ from the covariates $\mathbf{x}_i$. \item Choose a gradient descent step size as \begin{equation} \rho = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\hat f(\mathbf{x}_i)+\rho g(\mathbf{x}_i)) \end{equation} \item Update the estimate of $f(\mathbf{x})$ as \begin{equation} \hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \rho g(\mathbf{x}) \end{equation} \end{enumerate} \aRule \caption{Friedman's Gradient Boost algorithm} \label{fig:GradientBoost} \end{figure} Friedman (2001) and the companion paper Friedman (2002) extended the work of Friedman, Hastie, and Tibshirani (2000) and laid the ground work for a new generation of boosting algorithms. Using the connection between boosting and optimization, this new work proposes the Gradient Boosting Machine. In any function estimation problem we wish to find a regression function, $\hat f(\mathbf{x})$, that minimizes the expectation of some loss function, $\Psi(y,f)$, as shown in (\ref{NonparametricRegression1}). \begin{eqnarray} \hspace{0.5in} \hat f(\mathbf{x}) &=& \arg \min_{f(\mathbf{x})} \EV_{y,\mathbf{x}} \Psi(y,f(\mathbf{x})) \nonumber \\ \label{NonparametricRegression1} &=& \arg \min_{f(\mathbf{x})} \EV_x \left[ \EV_{y|\mathbf{x}} \Psi(y,f(\mathbf{x})) \Big| \mathbf{x} \right] \end{eqnarray} We will focus on finding estimates of $f(\mathbf{x})$ such that \begin{equation} \label{NonparametricRegression2} \hspace{0.5in} \hat f(\mathbf{x}) = \arg \min_{f(\mathbf{x})} \EV_{y|\mathbf{x}} \left[ \Psi(y,f(\mathbf{x}))|\mathbf{x} \right] \end{equation} Parametric regression models assume that $f(\mathbf{x})$ is a function with a finite number of parameters, $\beta$, and estimates them by selecting those values that minimize a loss function (e.g. squared error loss) over a training sample of $N$ observations on $(y,\mathbf{x})$ pairs as in (\ref{eq:Friedman1}). \begin{equation} \label{eq:Friedman1} \hspace{0.5in} \hat\beta = \arg \min_{\beta} \sum_{i=1}^N \Psi(y_i,f(\mathbf{x}_i;\beta)) \end{equation} When we wish to estimate $f(\mathbf{x})$ non-parametrically the task becomes more difficult. Again we can proceed similarly to \cite{FHT:2000} and modify our current estimate of $f(\mathbf{x})$ by adding a new function $f(\mathbf{x})$ in a greedy fashion. Letting $f_i = f(\mathbf{x}_i)$, we see that we want to decrease the $N$ dimensional function \begin{eqnarray} \label{EQ:Friedman2} \hspace{0.5in} J(\mathbf{f}) &=& \sum_{i=1}^N \Psi(y_i,f(\mathbf{x}_i)) \nonumber \\ &=& \sum_{i=1}^N \Psi(y_i,F_i). \end{eqnarray} The negative gradient of $J(\mathbf{f})$ indicates the direction of the locally greatest decrease in $J(\mathbf{f})$. Gradient descent would then have us modify $\mathbf{f}$ as \begin{equation} \label{eq:Friedman3} \hspace{0.5in} \hat \mathbf{f} \leftarrow \hat \mathbf{f} - \rho \nabla J(\mathbf{f}) \end{equation} where $\rho$ is the size of the step along the direction of greatest descent. Clearly, this step alone is far from our desired goal. First, it only fits $f$ at values of $\mathbf{x}$ for which we have observations. Second, it does not take into account that observations with similar $\mathbf{x}$ are likely to have similar values of $f(\mathbf{x})$. Both these problems would have disastrous effects on generalization error. However, Friedman suggests selecting a class of functions that use the covariate information to approximate the gradient, usually a regression tree. This line of reasoning produces his Gradient Boosting algorithm shown in Figure~\ref{fig:GradientBoost}. At each iteration the algorithm determines the direction, the gradient, in which it needs to improve the fit to the data and selects a particular model from the allowable class of functions that is in most agreement with the direction. In the case of squared-error loss, $\Psi(y_i,f(\mathbf{x}_i)) = \sum_{i=1}^N (y_i-f(\mathbf{x}_i))^2$, this algorithm corresponds exactly to residual fitting. There are various ways to extend and improve upon the basic framework suggested in Figure~\ref{fig:GradientBoost}. For example, Friedman (2001) substituted several choices in for $\Psi$ to develop new boosting algorithms for robust regression with least absolute deviation and Huber loss functions. Friedman (2002) showed that a simple subsampling trick can greatly improve predictive performance while simultaneously reduce computation time. Section~\ref{GBMModifications} discusses some of these modifications. \section{Improving boosting methods using control of the learning rate, sub-sampling, and a decomposition for interpretation} \label{GBMModifications} This section explores the variations of the previous algorithms that have the potential to improve their predictive performance and interpretability. In particular, by controlling the optimization speed or learning rate, introducing low-variance regression methods, and applying ideas from robust regression we can produce non-parametric regression procedures with many desirable properties. As a by-product some of these modifications lead directly into implementations for learning from massive datasets. All these methods take advantage of the general form of boosting \begin{equation} \hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \EV(z(y,\hat f(\mathbf{x}))|\mathbf{x}). \end{equation} So far we have taken advantage of this form only by substituting in our favorite regression procedure for $\EV_w(z|\mathbf{x})$. I will discuss some modifications to estimating $\EV_w(z|\mathbf{x})$ that have the potential to improve our algorithm. \subsection{Decreasing the learning rate} As several authors have phrased slightly differently, ``...boosting, whatever flavor, seldom seems to overfit, no matter how many terms are included in the additive expansion''. This is not true as the discussion to \cite{FHT:2000} points out. In the update step of any boosting algorithm we can introduce a learning rate to dampen the proposed move. \begin{equation} \label{eq:shrinkage} \hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \lambda \EV(z(y,\hat f(\mathbf{x}))|\mathbf{x}). \end{equation} By multiplying the gradient step by $\lambda$ as in equation~\ref{eq:shrinkage} we have control on the rate at which the boosting algorithm descends the error surface (or ascends the likelihood surface). When $\lambda=1$ we return to performing full gradient steps. Friedman (2001) relates the learning rate to regularization through shrinkage. The optimal number of iterations, $T$, and the learning rate, $\lambda$, depend on each other. In practice I set $\lambda$ to be as small as possible and then select $T$ by cross-validation. Performance is best when $\lambda$ is as small as possible performance with decreasing marginal utility for smaller and smaller $\lambda$. Slower learning rates do not necessarily scale the number of optimal iterations. That is, if when $\lambda=1.0$ and the optimal $T$ is 100 iterations, does {\it not} necessarily imply that when $\lambda=0.1$ the optimal $T$ is 1000 iterations. \subsection{Variance reduction using subsampling} Friedman (2002) proposed the stochastic gradient boosting algorithm that simply samples uniformly without replacement from the dataset before estimating the next gradient step. He found that this additional step greatly improved performance. We estimate the regression $\EV(z(y,\hat f(\mathbf{x}))|\mathbf{x})$ using a random subsample of the dataset. \subsection{ANOVA decomposition} Certain function approximation methods are decomposable in terms of a ``functional ANOVA decomposition''. That is a function is decomposable as \begin{equation} \label{ANOVAdecomp} f(\mathbf{x}) = \sum_j f_j(x_j) + \sum_{jk} f_{jk}(x_j,x_k) + \sum_{jk\ell} f_{jk\ell}(x_j,x_k,x_\ell) + \cdots. \end{equation} This applies to boosted trees. Regression stumps (one split decision trees) depend on only one variable and fall into the first term of \ref{ANOVAdecomp}. Trees with two splits fall into the second term of \ref{ANOVAdecomp} and so on. By restricting the depth of the trees produced on each boosting iteration we can control the order of approximation. Often additive components are sufficient to approximate a multivariate function well, generalized additive models, the na\"{\i}ve Bayes classifier, and boosted stumps for example. When the approximation is restricted to a first order we can also produce plots of $x_j$ versus $f_j(x_j)$ to demonstrate how changes in $x_j$ might affect changes in the response variable. \subsection{Relative influence} Friedman (2001) also develops an extension of a variable's ``relative influence'' for boosted estimates. For tree based methods the approximate relative influence of a variable $x_j$ is \begin{equation} \label{RelInfluence} \hspace{0.5in} \hat J_j^2 = \hspace{-0.1in}\sum_{\mathrm{splits~on~}x_j}\hspace{-0.2in}I_t^2 \end{equation} where $I_t^2$ is the empirical improvement by splitting on $x_j$ at that point. Friedman's extension to boosted models is to average the relative influence of variable $x_j$ across all the trees generated by the boosting algorithm. \begin{figure} \aRule Select \begin{itemize} \item a loss function (\texttt{distribution}) \item the number of iterations, $T$ (\texttt{n.trees}) \item the depth of each tree, $K$ (\texttt{interaction.depth}) \item the shrinkage (or learning rate) parameter, $\lambda$ (\texttt{shrinkage}) \item the subsampling rate, $p$ (\texttt{bag.fraction}) \end{itemize} Initialize $\hat f(\mathbf{x})$ to be a constant, $\hat f(\mathbf{x}) = \arg \min_{\rho} \sum_{i=1}^N \Psi(y_i,\rho)$ \\ For $t$ in $1,\ldots,T$ do \begin{enumerate} \item Compute the negative gradient as the working response \begin{equation} z_i = -\frac{\partial}{\partial f(\mathbf{x}_i)} \Psi(y_i,f(\mathbf{x}_i)) \mbox{\Huge $|$}_{f(\mathbf{x}_i)=\hat f(\mathbf{x}_i)} \end{equation} \item Randomly select $p\times N$ cases from the dataset \item Fit a regression tree with $K$ terminal nodes, $g(\mathbf{x})=\EV(z|\mathbf{x})$. This tree is fit using only those randomly selected observations \item Compute the optimal terminal node predictions, $\rho_1,\ldots,\rho_K$, as \begin{equation} \rho_k = \arg \min_{\rho} \sum_{\mathbf{x}_i\in S_k} \Psi(y_i,\hat f(\mathbf{x}_i)+\rho) \end{equation} where $S_k$ is the set of $\mathbf{x}$s that define terminal node $k$. Again this step uses only the randomly selected observations. \item Update $\hat f(\mathbf{x})$ as \begin{equation} \hat f(\mathbf{x}) \leftarrow \hat f(\mathbf{x}) + \lambda\rho_{k(\mathbf{x})} \end{equation} where $k(\mathbf{x})$ indicates the index of the terminal node into which an observation with features $\mathbf{x}$ would fall. \end{enumerate} \aRule \caption{Boosting as implemented in \texttt{gbm()}} \label{fig:gbm} \end{figure} \section{Common user options} This section discusses the options to gbm that most users will need to change or tune. \subsection{Loss function} The first and foremost choice is \texttt{distribution}. This should be easily dictated by the application. For most classification problems either \texttt{bernoulli} or \texttt{adaboost} will be appropriate, the former being recommended. For continuous outcomes the choices are \texttt{gaussian} (for minimizing squared error), \texttt{laplace} (for minimizing absolute error), and quantile regression (for estimating percentiles of the conditional distribution of the outcome). Censored survival outcomes should require \texttt{coxph}. Count outcomes may use \texttt{poisson} although one might also consider \texttt{gaussian} or \texttt{laplace} depending on the analytical goals. \subsection{The relationship between shrinkage and number of iterations} The issues that most new users of gbm struggle with are the choice of \texttt{n.trees} and \texttt{shrinkage}. It is important to know that smaller values of \texttt{shrinkage} (almost) always give improved predictive performance. That is, setting \texttt{shrinkage=0.001} will almost certainly result in a model with better out-of-sample predictive performance than setting \texttt{shrinkage=0.01}. However, there are computational costs, both storage and CPU time, associated with setting \texttt{shrinkage} to be low. The model with \texttt{shrinkage=0.001} will likely require ten times as many iterations as the model with \texttt{shrinkage=0.01}, increasing storage and computation time by a factor of 10. Figure~\ref{fig:shrinkViters} shows the relationship between predictive performance, the number of iterations, and the shrinkage parameter. Note that the increase in the optimal number of iterations between two choices for shrinkage is roughly equal to the ratio of the shrinkage parameters. It is generally the case that for small shrinkage parameters, 0.001 for example, there is a fairly long plateau in which predictive performance is at its best. My rule of thumb is to set \texttt{shrinkage} as small as possible while still being able to fit the model in a reasonable amount of time and storage. I usually aim for 3,000 to 10,000 iterations with shrinkage rates between 0.01 and 0.001. \begin{figure}[ht] \begin{center} \includegraphics[width=5in]{shrinkage-v-iterations} \end{center} \caption{Out-of-sample predictive performance by number of iterations and shrinkage. Smaller values of the shrinkage parameter offer improved predictive performance, but with decreasing marginal improvement.} \label{fig:shrinkViters} \end{figure} \subsection{Estimating the optimal number of iterations} gbm offers three methods for estimating the optimal number of iterations after the gbm model has been fit, an independent test set (\texttt{test}), out-of-bag estimation (\texttt{OOB}), and $v$-fold cross validation (\texttt{cv}). The function \texttt{gbm.perf} computes the iteration estimate. Like Friedman's MART software, the independent test set method uses a single holdout test set to select the optimal number of iterations. If \texttt{train.fraction} is set to be less than 1, then only the \textit{first} \texttt{train.fraction}$\times$\texttt{nrow(data)} will be used to fit the model. Note that if the data are sorted in a systematic way (such as cases for which $y=1$ come first), then the data should be shuffled before running gbm. Those observations not used in the model fit can be used to get an unbiased estimate of the optimal number of iterations. The downside of this method is that a considerable number of observations are used to estimate the single regularization parameter (number of iterations) leaving a reduced dataset for estimating the entire multivariate model structure. Use \texttt{gbm.perf(...,method="test")} to obtain an estimate of the optimal number of iterations using the held out test set. If \texttt{bag.fraction} is set to be greater than 0 (0.5 is recommended), gbm computes an out-of-bag estimate of the improvement in predictive performance. It evaluates the reduction in deviance on those observations not used in selecting the next regression tree. The out-of-bag estimator underestimates the reduction in deviance. As a result, it almost always is too conservative in its selection for the optimal number of iterations. The motivation behind this method was to avoid having to set aside a large independent dataset, which reduces the information available for learning the model structure. Use \texttt{gbm.perf(...,method="OOB")} to obtain the OOB estimate. Lastly, gbm offers $v$-fold cross validation for estimating the optimal number of iterations. If when fitting the gbm model, \texttt{cv.folds=5} then gbm will do 5-fold cross validation. gbm will fit five gbm models in order to compute the cross validation error estimate and then will fit a sixth and final gbm model with \texttt{n.trees}iterations using all of the data. The returned model object will have a component labeled \texttt{cv.error}. Note that \texttt{gbm.more} will do additional gbm iterations but will not add to the \texttt{cv.error} component. Use \texttt{gbm.perf(...,method="cv")} to obtain the cross validation estimate. \begin{figure}[ht] \begin{center} \includegraphics[width=5in]{oobperf2} \end{center} \caption{Out-of-sample predictive performance of four methods of selecting the optimal number of iterations. The vertical axis plots performance relative the best. The boxplots indicate relative performance across thirteen real datasets from the UCI repository. See \texttt{demo(OOB-reps)}.} \label{fig:oobperf} \end{figure} Figure~\ref{fig:oobperf} compares the three methods for estimating the optimal number of iterations across 13 datasets. The boxplots show the methods performance relative to the best method on that dataset. For most datasets the method perform similarly, however, 5-fold cross validation is consistently the best of them. OOB, using a 33\% test set, and using a 20\% test set all have datasets for which the perform considerably worse than the best method. My recommendation is to use 5- or 10-fold cross validation if you can afford the computing time. Otherwise you may choose among the other options, knowing that OOB is conservative. \section{Available distributions} This section gives some of the mathematical detail for each of the distribution options that gbm offers. The gbm engine written in C++ has access to a C++ class for each of these distributions. Each class contains methods for computing the associated deviance, initial value, the gradient, and the constants to predict in each terminal node. In the equations shown below, for non-zero offset terms, replace $f(\mathbf{x}_i)$ with $o_i + f(\mathbf{x}_i)$. \subsection{Gaussian} \begin{tabular}{ll} Deviance & $\displaystyle \frac{1}{\sum w_i} \sum w_i(y_i-f(\mathbf{x}_i))^2$ \\ Initial value & $\displaystyle f(\mathbf{x})=\frac{\sum w_i(y_i-o_i)}{\sum w_i}$ \\ Gradient & $z_i=y_i - f(\mathbf{x}_i)$ \\ Terminal node estimates & $\displaystyle \frac{\sum w_i(y_i-f(\mathbf{x}_i))}{\sum w_i}$ \end{tabular} \subsection{AdaBoost} \begin{tabular}{ll} Deviance & $\displaystyle \frac{1}{\sum w_i} \sum w_i\exp(-(2y_i-1)f(\mathbf{x}_i))$ \\ Initial value & $\displaystyle \frac{1}{2}\log\frac{\sum y_iw_ie^{-o_i}}{\sum (1-y_i)w_ie^{o_i}}$ \\ Gradient & $\displaystyle z_i= -(2y_i-1)\exp(-(2y_i-1)f(\mathbf{x}_i))$ \\ Terminal node estimates & $\displaystyle \frac{\sum (2y_i-1)w_i\exp(-(2y_i-1)f(\mathbf{x}_i))} {\sum w_i\exp(-(2y_i-1)f(\mathbf{x}_i))}$ \end{tabular} \subsection{Bernoulli} \begin{tabular}{ll} Deviance & $\displaystyle -2\frac{1}{\sum w_i} \sum w_i(y_if(\mathbf{x}_i)-\log(1+\exp(f(\mathbf{x}_i))))$ \\ Initial value & $\displaystyle \log\frac{\sum w_iy_i}{\sum w_i(1-y_i)}$ \\ Gradient & $\displaystyle z_i=y_i-\frac{1}{1+\exp(-f(\mathbf{x}_i))}$ \\ Terminal node estimates & $\displaystyle \frac{\sum w_i(y_i-p_i)}{\sum w_ip_i(1-p_i)}$ \\ & where $\displaystyle p_i = \frac{1}{1+\exp(-f(\mathbf{x}_i))}$ \\ \end{tabular} Notes: \begin{itemize} \item For non-zero offset terms, the computation of the initial value requires Newton-Raphson. Initialize $f_0=0$ and iterate $\displaystyle f_0 \leftarrow f_0 + \frac{\sum w_i(y_i-p_i)}{\sum w_ip_i(1-p_i)}$ where $\displaystyle p_i = \frac{1}{1+\exp(-(o_i+f_0))}$. \end{itemize} \subsection{Laplace} \begin{tabular}{ll} Deviance & $\frac{1}{\sum w_i} \sum w_i|y_i-f(\mathbf{x}_i)|$ \\ Initial value & $\mbox{median}_w(y)$ \\ Gradient & $z_i=\mbox{sign}(y_i-f(\mathbf{x}_i))$ \\ Terminal node estimates & $\mbox{median}_w(z)$ \end{tabular} Notes: \begin{itemize} \item $\mbox{median}_w(y)$ denotes the weighted median, defined as the solution to the equation $\frac{\sum w_iI(y_i\leq m)}{\sum w_i}=\frac{1}{2}$ \item \texttt{gbm()} currently does not implement the weighted median and issues a warning when the user uses weighted data with \texttt{distribution="laplace"}. \end{itemize} \subsection{Quantile regression} Contributed by Brian Kriegler (see \cite{Kriegler:2010}). \begin{tabular}{ll} Deviance & $\frac{1}{\sum w_i} \left(\alpha\sum_{y_i>f(\mathbf{x}_i)} w_i(y_i-f(\mathbf{x}_i))\right. +$ \\ & \hspace{0.5in}$\left.(1-\alpha)\sum_{y_i\leq f(\mathbf{x}_i)} w_i(f(\mathbf{x}_i)-y_i)\right)$ \\ Initial value & $\mathrm{quantile}^{(\alpha)}_w(y)$ \\ Gradient & $z_i=\alpha I(y_i>f(\mathbf{x}_i))-(1-\alpha)I(y_i\leq f(\mathbf{x}_i))$ \\ Terminal node estimates & $\mathrm{quantile}^{(\alpha)}_w(z)$ \end{tabular} Notes: \begin{itemize} \item $\mathrm{quantile}^{(\alpha)}_w(y)$ denotes the weighted quantile, defined as the solution to the equation $\frac{\sum w_iI(y_i\leq q)}{\sum w_i}=\alpha$ \item \texttt{gbm()} currently does not implement the weighted median and issues a warning when the user uses weighted data with \texttt{distribution=list(name="quantile")}. \end{itemize} \subsection{Cox Proportional Hazard} \begin{tabular}{ll} Deviance & $-2\sum w_i(\delta_i(f(\mathbf{x}_i)-\log(R_i/w_i)))$\\ Gradient & $\displaystyle z_i=\delta_i - \sum_j \delta_j \frac{w_jI(t_i\geq t_j)e^{f(\mathbf{x}_i)}} {\sum_k w_kI(t_k\geq t_j)e^{f(\mathbf{x}_k)}}$ \\ Initial value & 0 \\ Terminal node estimates & Newton-Raphson algorithm \end{tabular} \begin{enumerate} \item Initialize the terminal node predictions to 0, $\mathgbf{\rho}=0$ \item Let $\displaystyle p_i^{(k)}=\frac{\sum_j I(k(j)=k)I(t_j\geq t_i)e^{f(\mathbf{x}_i)+\rho_k}} {\sum_j I(t_j\geq t_i)e^{f(\mathbf{x}_i)+\rho_k}}$ \item Let $g_k=\sum w_i\delta_i\left(I(k(i)=k)-p_i^{(k)}\right)$ \item Let $\mathbf{H}$ be a $k\times k$ matrix with diagonal elements \begin{enumerate} \item Set diagonal elements $H_{mm}=\sum w_i\delta_i p_i^{(m)}\left(1-p_i^{(m)}\right)$ \item Set off diagonal elements $H_{mn}=-\sum w_i\delta_i p_i^{(m)}p_i^{(n)}$ \end{enumerate} \item Newton-Raphson update $\mathgbf{\rho} \leftarrow \mathgbf{\rho} - \mathbf{H}^{-1}\mathbf{g}$ \item Return to step 2 until convergence \end{enumerate} Notes: \begin{itemize} \item $t_i$ is the survival time and $\delta_i$ is the death indicator. \item $R_i$ denotes the hazard for the risk set, $R_i=\sum_{j=1}^N w_jI(t_j\geq t_i)e^{f(\mathbf{x}_i)}$ \item $k(i)$ indexes the terminal node of observation $i$ \item For speed, \texttt{gbm()} does only one step of the Newton-Raphson algorithm rather than iterating to convergence. No appreciable loss of accuracy since the next boosting iteration will simply correct for the prior iterations inadequacy. \item \texttt{gbm()} initially sorts the data by survival time. Doing this reduces the computation of the risk set from $O(n^2)$ to $O(n)$ at the cost of a single up front sort on survival time. After the model is fit, the data are then put back in their original order. \end{itemize} \subsection{Poisson} \begin{tabular}{ll} Deviance & -2$\frac{1}{\sum w_i} \sum w_i(y_if(\mathbf{x}_i)-\exp(f(\mathbf{x}_i)))$ \\ Initial value & $\displaystyle f(\mathbf{x})= \log\left(\frac{\sum w_iy_i}{\sum w_ie^{o_i}}\right)$ \\ Gradient & $z_i=y_i - \exp(f(\mathbf{x}_i))$ \\ Terminal node estimates & $\displaystyle \log\frac{\sum w_iy_i}{\sum w_i\exp(f(\mathbf{x}_i))}$ \end{tabular} The Poisson class includes special safeguards so that the most extreme predicted values are $e^{-19}$ and $e^{+19}$. This behavior is consistent with \texttt{glm()}. \subsection{Pairwise} This distribution implements ranking measures following the \emph{LambdaMart} algorithm \cite{Burges:2010}. Instances belong to \emph{groups}; all pairs of items with different labels, belonging to the same group, are used for training. In \emph{Information Retrieval} applications, groups correspond to user queries, and items to (feature vectors of) documents in the associated match set to be ranked. For consistency with typical usage, our goal is to \emph{maximize} one of the \emph{utility} functions listed below. Consider a group with instances $x_1, \dots, x_n$, ordered such that $f(x_1) \geq f(x_2) \geq \dots f(x_n)$; i.e., the \emph{rank} of $x_i$ is $i$, where smaller ranks are preferable. Let $P$ be the set of all ordered pairs such that $y_i > y_j$. \begin{enumerate} \item[{\bf Concordance:}] Fraction of concordant (i.e, correctly ordered) pairs. For the special case of binary labels, this is equivalent to the Area under the ROC Curve. $$\left\{ \begin{array}{l l}\frac{\|\{(i,j)\in P | f(x_i)>f(x_j)\}\|}{\|P\|} & P \neq \emptyset\\ 0 & \mbox{otherwise.} \end{array}\right. $$ \item[{\bf MRR:}] Mean reciprocal rank of the highest-ranked positive instance (it is assumed $y_i\in\{0,1\}$): $$\left\{ \begin{array}{l l}\frac{1}{\min\{1 \leq i \leq n |y_i=1\}} & \exists i: \, 1 \leq i \leq n, y_i=1\\ 0 & \mbox{otherwise.}\end{array}\right.$$ \item[{\bf MAP:}] Mean average precision, a generalization of MRR to multiple positive instances: $$\left\{ \begin{array}{l l} \frac{\sum_{1\leq i\leq n | y_i=1} \|\{1\leq j\leq i |y_j=1\}\|\,/\,i}{\|\{1\leq i\leq n | y_i=1\}\|} & \exists i: \, 1 \leq i \leq n, y_i=1\\ 0 & \mbox{otherwise.}\end{array}\right.$$ \item[{\bf nDCG:}] Normalized discounted cumulative gain: $$\frac{\sum_{1\leq i\leq n} \log_2(i+1) \, y_i}{\sum_{1\leq i\leq n} \log_2(i+1) \, y'_i},$$ where $y'_1, \dots, y'_n$ is a reordering of $y_1, \dots,y_n$ with $y'_1 \geq y'_2 \geq \dots \geq y'_n$. \end{enumerate} The generalization to multiple (possibly weighted) groups is straightforward. Sometimes a cut-off rank $k$ is given for \emph{MRR} and \emph{nDCG}, in which case we replace the outer index $n$ by $\min(n,k)$. The initial value for $f(x_i)$ is always zero. We derive the gradient of a cost function whose gradient locally approximates the gradient of the IR measure for a fixed ranking: \begin{eqnarray*} \Phi & = & \sum_{(i,j) \in P} \Phi_{ij}\\ & = & \sum_{(i,j) \in P} |\Delta Z_{ij}| \log \left( 1 + e^{-(f(x_i) - f(x_j))}\right), \end{eqnarray*} where $|\Delta Z_{ij}|$ is the absolute utility difference when swapping the ranks of $i$ and $j$, while leaving all other instances the same. Define \begin{eqnarray*} \lambda_{ij} & = & \frac{\partial\Phi_{ij}}{\partial f(x_i)}\\ & = & - |\Delta Z_{ij}| \frac{1}{1 + e^{f(x_i) - f(x_j)}}\\ & = & - |\Delta Z_{ij}| \, \rho_{ij}, \end{eqnarray*} with $$ \rho_{ij} = - \frac{\lambda_{ij }}{|\Delta Z_{ij}|} = \frac{1}{1 + e^{f(x_i) - f(x_j)}}$$ For the gradient of $\Phi$ with respect to $f(x_i)$, define \begin{eqnarray*} \lambda_i & = & \frac{\partial \Phi}{\partial f(x_i)}\\ & = & \sum_{j|(i,j) \in P} \lambda_{ij} - \sum_{j|(j,i) \in P} \lambda_{ji}\\ & = & - \sum_{j|(i,j) \in P} |\Delta Z_{ij}| \, \rho_{ij}\\ & & \mbox{} + \sum_{j|(j,i) \in P} |\Delta Z_{ji}| \, \rho_{ji}. \end{eqnarray*} The second derivative is \begin{eqnarray*} \gamma_i & \stackrel{def}{=} & \frac{\partial^2\Phi}{\partial f(x_i)^2}\\ & = & \sum_{j|(i,j) \in P} |\Delta Z_{ij}| \, \rho_{ij} \, (1-\rho_{ij})\\ & & \mbox{} + \sum_{j|(j,i) \in P} |\Delta Z_{ji}| \, \rho_{ji} \, (1-\rho_{ji}). \end{eqnarray*} Now consider again all groups with associated weights. For a given terminal node, let $i$ range over all contained instances. Then its estimate is $$-\frac{\sum_i v_i\lambda_{i}}{\sum_i v_i \gamma_i},$$ where $v_i=w(\mbox{\em group}(i))/\|\{(j,k)\in\mbox{\em group}(i)\}\|.$ In each iteration, instances are reranked according to the preliminary scores $f(x_i)$ to determine the $|\Delta Z_{ij}|$. Note that in order to avoid ranking bias, we break ties by adding a small amount of random noise. \begin{thebibliography}{77} % start the bibliography \small % put the bibliography in a small font \bibitem{FreundSchapire:1997} Y. Freund and R.E. Schapire (1997). ``A decision-theoretic generalization of on-line learning and an application to boosting,'' \textit{Journal of Computer and System Sciences}, 55(1):119-139. \bibitem{Friedman:2001} J.H. Friedman (2001). ``Greedy Function Approximation: A Gradient Boosting Machine,'' \textit{Annals of Statistics} 29(5):1189-1232. \bibitem{Friedman:2002} J.H. Friedman (2002). ``Stochastic Gradient Boosting,'' \textit{Computational Statistics and Data Analysis} 38(4):367-378. \bibitem{FHT:2000} J.H. Friedman, T. Hastie, R. Tibshirani (2000). ``Additive Logistic Regression: a Statistical View of Boosting,'' \textit{Annals of Statistics} 28(2):337-374. \bibitem{Kriegler:2010} B. Kriegler and R. Berk (2010). ``Small Area Estimation of the Homeless in Los Angeles, An Application of Cost-Sensitive Stochastic Gradient Boosting,'' \textit{Annals of Applied Statistics} 4(3):1234-1255. \bibitem{Ridgeway:1999} G. Ridgeway (1999). ``The state of boosting,'' \textit{Computing Science and Statistics} 31:172-181. \bibitem{Burges:2010} C. Burges (2010). ``From RankNet to LambdaRank to LambdaMART: An Overview'', \textit{Microsoft Research Technical Report MSR-TR-2010-82} \end{thebibliography} % end the bibliography \end{document} gbm/inst/doc/shrinkage-v-iterations.eps0000644000176000001440000005332212102666411017703 0ustar ripleyusers%!PS-Adobe-3.0 %%DocumentNeededResources: font Helvetica %%+ font Helvetica-Bold %%+ font Helvetica-Oblique %%+ font Helvetica-BoldOblique %%+ font Symbol %%DocumentMedia: a4 595 841 0 () () %%Title: R Graphics Output %%Creator: R Software %%Pages: (atend) %%Orientation: Portrait %%BoundingBox: 18 205 577 637 %%EndComments %%BeginProlog /bp { gs gs } def % begin .ps.prolog /gs { gsave } def /gr { grestore } def /ep { showpage gr gr } def /m { moveto } def /l { rlineto } def /np { newpath } def /cp { closepath } def /f { fill } def /o { stroke } def /c { newpath 0 360 arc } def /r { 4 2 roll moveto 1 copy 3 -1 roll exch 0 exch rlineto 0 rlineto -1 mul 0 exch rlineto closepath } def /p1 { stroke } def /p2 { gsave bg setrgbcolor fill grestore newpath } def /p3 { gsave bg setrgbcolor fill grestore stroke } def /t { 6 -2 roll moveto gsave rotate ps mul neg 0 2 1 roll rmoveto 1 index stringwidth pop mul neg 0 rmoveto show grestore } def /cl { grestore gsave newpath 3 index 3 index moveto 1 index 4 -1 roll lineto exch 1 index lineto lineto closepath clip newpath } def /rgb { setrgbcolor } def /s { scalefont setfont } def % end .ps.prolog %%IncludeResource: font Helvetica /Helvetica findfont dup length dict begin {1 index /FID ne {def} {pop pop} ifelse} forall /Encoding ISOLatin1Encoding def currentdict end /Font1 exch definefont pop %%IncludeResource: font Helvetica-Bold /Helvetica-Bold findfont dup length dict begin {1 index /FID ne {def} {pop pop} ifelse} forall /Encoding ISOLatin1Encoding def currentdict end /Font2 exch definefont pop %%IncludeResource: font Helvetica-Oblique /Helvetica-Oblique findfont dup length dict begin {1 index /FID ne {def} {pop pop} ifelse} forall /Encoding ISOLatin1Encoding def currentdict end /Font3 exch definefont pop %%IncludeResource: font Helvetica-BoldOblique /Helvetica-BoldOblique findfont dup length dict begin {1 index /FID ne {def} {pop pop} ifelse} forall /Encoding ISOLatin1Encoding def currentdict end /Font4 exch definefont pop %%IncludeResource: font Symbol /Symbol findfont dup length dict begin {1 index /FID ne {def} {pop pop} ifelse} forall currentdict end /Font5 exch definefont pop %%EndProlog %%Page: 1 1 bp 18.00 204.94 577.28 636.94 cl 0 0 0 rgb 0.75 setlinewidth [] 0 setdash 1 setlinecap 1 setlinejoin 10.00 setmiterlimit np 94.45 278.38 m 435.18 0 l o np 94.45 278.38 m 0 -7.20 l o np 181.48 278.38 m 0 -7.20 l o np 268.52 278.38 m 0 -7.20 l o np 355.56 278.38 m 0 -7.20 l o np 442.59 278.38 m 0 -7.20 l o np 529.63 278.38 m 0 -7.20 l o /ps 12 def /Font1 findfont 12 s 94.45 252.46 (0) .5 0 0 t 181.48 252.46 (2000) .5 0 0 t 268.52 252.46 (4000) .5 0 0 t 355.56 252.46 (6000) .5 0 0 t 442.59 252.46 (8000) .5 0 0 t 529.63 252.46 (10000) .5 0 0 t np 77.04 289.48 m 0 277.33 l o np 77.04 289.48 m -7.20 0 l o np 77.04 358.81 m -7.20 0 l o np 77.04 428.14 m -7.20 0 l o np 77.04 497.48 m -7.20 0 l o np 77.04 566.81 m -7.20 0 l o 59.76 289.48 (0.190) .5 0 90 t 59.76 358.81 (0.195) .5 0 90 t 59.76 428.14 (0.200) .5 0 90 t 59.76 497.48 (0.205) .5 0 90 t 59.76 566.81 (0.210) .5 0 90 t np 77.04 278.38 m 470.00 0 l 0 299.52 l -470.00 0 l 0 -299.52 l o 18.00 204.94 577.28 636.94 cl /ps 12 def /Font1 findfont 12 s 0 0 0 rgb 312.04 223.66 (Iterations) .5 0 0 t 30.96 428.14 (Squared error) .5 0 90 t 77.04 278.38 547.04 577.90 cl 0 0 0 rgb 0.75 setlinewidth [] 0 setdash 1 setlinecap 1 setlinejoin 10.00 setmiterlimit np 96.30 636.94 m 0.80 -256.68 l 0.87 -37.13 l 0.35 -0.45 l 0.52 11.92 l 0.87 2.33 l 0.87 -2.31 l 0.87 9.87 l 0.87 15.91 l 0.87 2.35 l 0.87 14.48 l 0.88 3.97 l 0.87 10.90 l 0.91 13.99 l 0.87 9.66 l 0.87 14.32 l 0.87 12.06 l 0.87 0.63 l 0.87 7.84 l 0.87 0.86 l 0.87 16.91 l 0.87 13.95 l 0.87 6.15 l 0.87 19.28 l 0.87 13.99 l 0.87 7.21 l 0.87 3.98 l 0.87 6.88 l 0.87 6.38 l 0.88 14.52 l 0.87 15.71 l 0.87 12.61 l 0.87 1.80 l 0.87 4.97 l 0.87 5.38 l 0.87 8.96 l 0.87 5.94 l 0.08 0.86 l o 18.00 204.94 577.28 636.94 cl 0 0 0 rgb 0.38 setlinewidth [] 0 setdash 1 setlinecap 1 setlinejoin 10.00 setmiterlimit np 98.32 278.38 m 0 0 l o np 98.32 278.38 m 0 8.99 l o 77.04 278.38 547.04 577.90 cl /ps 12 def /Font1 findfont 12 s 0 0 0 rgb 98.32 331.64 (0.1) 1 0 0 t 1 0 0 rgb 0.75 setlinewidth [] 0 setdash 1 setlinecap 1 setlinejoin 10.00 setmiterlimit np 98.18 636.94 m 0.66 -170.73 l 0.87 -91.55 l 0.87 -32.71 l 0.87 -11.27 l 0.83 -4.42 l 0.04 1.99 l 0.87 2.36 l 0.87 5.74 l 0.88 5.32 l 0.87 10.10 l 0.91 -0.18 l 0.87 0.01 l 0.87 7.30 l 0.87 5.28 l 0.87 3.18 l 0.87 8.16 l 0.87 3.52 l 0.87 3.60 l 0.87 6.17 l 0.87 5.93 l 0.87 5.86 l 0.87 9.58 l 0.87 4.88 l 0.87 1.50 l 0.87 5.88 l 0.87 1.56 l 0.88 1.58 l 0.87 6.89 l 0.87 8.30 l 0.87 3.06 l 0.87 3.80 l 0.87 7.77 l 0.87 6.79 l 0.87 0.12 l 0.87 5.03 l 0.87 3.56 l 0.91 5.44 l 0.87 0.11 l 0.87 2.74 l 0.87 3.12 l 0.87 2.71 l 0.87 4.84 l 0.87 9.32 l 0.87 2.49 l 0.88 2.34 l 0.87 8.43 l 0.87 3.91 l 0.87 3.49 l 0.87 8.17 l 0.87 6.87 l 0.87 2.14 l 0.87 -1.13 l 0.87 1.81 l 0.87 5.45 l 0.87 3.55 l 0.87 4.15 l 0.87 -0.88 l 0.87 3.64 l 0.87 6.16 l 0.87 6.55 l 0.87 3.76 l 0.87 -1.51 l 0.92 3.77 l 0.87 4.83 l 0.87 4.47 l 0.87 -0.78 l 0.87 3.07 l 0.87 2.23 l 0.87 3.07 l 0.87 4.25 l 0.87 0.91 l 0.87 2.07 l 0.87 3.87 l 0.87 -0.27 l 0.87 4.13 l 0.87 2.34 l 0.87 6.20 l 0.87 -0.02 l 0.87 2.70 l 0.87 4.35 l 0.87 1.13 l 0.87 1.01 l 0.87 6.80 l 0.87 1.41 l 0.87 4.57 l 0.34 2.26 l o 18.00 204.94 577.28 636.94 cl 1 0 0 rgb 0.38 setlinewidth [] 0 setdash 1 setlinecap 1 setlinejoin 10.00 setmiterlimit np 102.28 278.38 m 0 0 l o np 102.28 278.38 m 0 8.99 l o 77.04 278.38 547.04 577.90 cl /ps 12 def /Font1 findfont 12 s 0 0 0 rgb 102.28 315.22 (0.05) 1 0 0 t 0 0.8039 0 rgb 0.75 setlinewidth [] 0 setdash 1 setlinecap 1 setlinejoin 10.00 setmiterlimit np 112.73 636.94 m 0.08 -6.15 l 0.87 -56.32 l 0.87 -45.80 l 0.87 -38.10 l 0.87 -32.80 l 0.87 -27.05 l 0.87 -22.21 l 0.87 -20.65 l 0.87 -16.95 l 0.88 -11.58 l 0.87 -9.86 l 0.87 -9.21 l 0.87 -6.46 l 0.87 -5.19 l 0.87 -3.64 l 0.87 -3.58 l 0.87 -1.01 l 0.87 -2.63 l 0.87 -2.59 l 0.91 -1.32 l 0.87 -1.63 l 0.87 0.29 l 0.87 -0.46 l 0.87 -0.33 l 0.87 0.60 l 0.87 -0.35 l 0.87 -0.30 l 0.88 0.56 l 0.87 0.21 l 0.87 0.44 l 0.87 -1.21 l 0.69 -0.58 l 0.18 0.27 l 0.87 0.81 l 0.87 1.14 l 0.87 0.71 l 0.87 0.66 l 0.87 0.85 l 0.87 1.16 l 0.87 1.98 l 0.87 0.37 l 0.87 0 l 0.87 0.88 l 0.87 0.51 l 0.87 -0.38 l 0.87 0.17 l 0.92 0.88 l 0.87 1.91 l 0.87 0.90 l 0.87 1.37 l 0.87 2.06 l 0.87 1.83 l 0.87 0.91 l 0.87 0.82 l 0.87 0.86 l 0.87 0.51 l 0.87 -0.11 l 0.87 0.64 l 0.87 1.33 l 0.87 1.11 l 0.87 0.91 l 0.87 0.58 l 0.87 0.11 l 0.87 1.19 l 0.87 2.27 l 0.87 1.39 l 0.87 -0.25 l 0.87 0.04 l 0.87 0.37 l 0.87 1.15 l 0.87 0.02 l 0.87 0.51 l 0.92 0.29 l 0.87 1.00 l 0.87 1.59 l 0.87 -0.20 l 0.87 0.27 l 0.87 0.64 l 0.87 -0.20 l 0.87 1.00 l 0.87 0.78 l 0.87 0.81 l 0.87 1.66 l 0.87 0.58 l 0.87 0.25 l 0.87 0.42 l 0.87 1.09 l 0.87 1.17 l 0.87 1.33 l 0.87 -0.17 l 0.87 0.97 l 0.88 0.62 l 0.87 1.02 l 0.87 0.29 l 0.87 0.87 l 0.87 0.76 l 0.87 1.14 l 0.87 1.21 l 0.87 0.65 l 198.28 363.84 lineto 0.87 0.66 l 0.87 0.85 l 0.87 0.97 l 0.87 0.44 l 0.87 1.68 l 0.87 0.14 l 0.87 1.08 l 0.87 1.38 l 0.87 1.74 l 0.88 0.28 l 0.87 1.61 l 0.87 0.37 l 0.87 0.22 l 0.87 0.99 l 0.87 1.60 l 0.87 0.66 l 0.87 1.76 l 0.87 -0.19 l 0.87 0.51 l 0.87 0.94 l 0.87 0.38 l 0.87 -0.91 l 0.87 0.12 l 0.87 1.15 l 0.87 1.08 l 0.91 0.21 l 0.87 0.27 l 0.88 1.68 l 0.87 0.51 l 0.87 0.39 l 0.87 1.43 l 0.87 0.10 l 0.87 -0.25 l 0.87 0.91 l 0.87 0.85 l 0.87 0.22 l 0.87 0.42 l 0.87 0.45 l 0.87 0.93 l 0.87 1.95 l 0.87 0.77 l 0.87 0.36 l 0.87 1.83 l 0.87 0.75 l 0.87 1.03 l 0.87 1.37 l 0.87 0.72 l 0.87 0.91 l 0.87 0.23 l 0.87 -0.03 l 0.87 1.04 l 0.92 0.62 l 0.87 2.08 l 0.87 0.94 l 0.87 1.17 l 0.87 0.61 l 0.87 0.70 l 0.87 0.75 l 0.87 0.41 l 0.87 1.05 l 0.87 0.98 l 0.87 1.62 l 0.87 1.02 l 0.87 0.90 l 0.87 0.61 l 0.87 1.07 l 0.87 0.31 l 0.87 1.95 l 0.87 0.57 l 0.87 0.43 l 0.87 0.26 l 0.87 1.33 l 0.87 1.35 l 0.88 0.39 l 0.87 0.46 l 0.87 1.00 l 0.87 0.97 l 0.91 1.56 l 0.87 0.76 l 0.87 0.31 l 0.87 0.64 l 0.87 1.48 l 0.87 0.36 l 0.87 1.65 l 0.87 0.04 l 0.87 0.60 l 0.87 0.97 l 0.87 -0.25 l 0.87 1.14 l 0.87 1.21 l 0.87 0.97 l 0.88 0.71 l 0.87 1.29 l 0.87 2.65 l 0.87 0.27 l 0.87 0.83 l 0.87 0.48 l 0.87 1.17 l 0.87 1.93 l 285.45 447.56 lineto 0.87 1.61 l 0.87 1.50 l 0.87 0.99 l 0.87 0.53 l 0.91 1.19 l 0.87 0.24 l 0.87 0.58 l 0.87 1.15 l 0.88 0.63 l 0.87 0.98 l 0.87 0.41 l 0.87 0.41 l 0.87 0.54 l 0.87 0.71 l 0.87 0.36 l 0.87 1.06 l 0.87 1.10 l 0.87 0.91 l 0.87 0.15 l 0.87 1.07 l 0.87 0.36 l 0.87 0.21 l 0.87 -0.30 l 0.87 0.62 l 0.87 1.48 l 0.87 1.05 l 0.87 1.33 l 0.87 0.22 l 0.87 0.56 l 0.87 0.59 l 0.92 1.34 l 0.87 1.07 l 0.87 0.45 l 0.87 0.94 l 0.87 1.14 l 0.87 1.16 l 0.87 0.24 l 0.87 0.68 l 0.87 1.09 l 0.87 0.54 l 0.87 0.18 l 0.87 0.45 l 0.87 0.34 l 0.87 0.82 l 0.87 1.84 l 0.87 0.53 l 0.87 0.76 l 0.87 1.45 l 0.87 0.78 l 0.87 -0.06 l 0.87 -0.05 l 0.87 0.72 l 0.87 0.31 l 0.87 -0.60 l 0.88 0.38 l 0.87 0.46 l 0.91 1.02 l 0.87 0.88 l 0.87 0.81 l 0.87 0.38 l 0.87 0.96 l 0.87 1.18 l 0.87 0.54 l 0.87 0.37 l 0.87 0.55 l 0.87 0.44 l 0.87 0.68 l 0.87 0.28 l 0.87 0.87 l 0.87 0.37 l 0.87 2.00 l 0.87 2.04 l 0.88 0.18 l 0.87 1.12 l 0.87 0.16 l 0.87 0.73 l 0.87 0.50 l 0.87 0.32 l 0.87 0.53 l 0.87 0.79 l 0.87 0.81 l 0.87 0.58 l 0.87 1.01 l 0.91 0.83 l 0.87 0.47 l 0.87 0.19 l 0.87 1.51 l 0.87 0.76 l 0.87 0.82 l 0.87 -0.46 l 0.88 0.31 l 0.87 0.80 l 0.87 1.23 l 0.87 0.96 l 0.87 0.80 l 0.87 1.16 l 0.87 0.92 l 0.87 0.23 l 0.87 0.79 l 372.66 519.24 lineto 0.87 0.41 l 0.87 0.50 l 0.87 0.44 l 0.87 0.47 l 0.87 1.13 l 0.87 -0.09 l 0.87 0.65 l 0.87 1.10 l 0.87 0.35 l 0.92 0.59 l 0.87 0.81 l 0.87 0.66 l 0.87 1.19 l 0.87 0.17 l 0.87 0.55 l 0.87 1.10 l 0.87 0.75 l 0.87 -0.18 l 0.87 0.06 l 0.87 0.26 l 0.87 0.50 l 0.87 0.42 l 0.87 1.57 l 0.87 -0.33 l 0.87 0.58 l 0.87 0 l 0.87 0.12 l 0.87 0.19 l 0.87 1.21 l 0.87 0.97 l 0.87 -0.84 l 0.87 0.74 l 0.87 0.72 l 0.87 0.74 l 0.87 -0.11 l 0.92 0.28 l 0.87 -0.04 l 0.87 0.08 l 0.87 -0.49 l 0.87 -0.63 l 0.87 0.57 l 0.87 1.38 l 0.87 0.20 l 0.87 0.99 l 0.87 -0.07 l 0.87 0.52 l 0.87 0.85 l 0.87 0.78 l 0.87 0.01 l 0.87 1.07 l 0.87 1.58 l 0.87 0.84 l 0.87 0.79 l 0.88 1.16 l 0.87 1.08 l 0.87 0.38 l 0.87 0.24 l 0.87 -0.48 l 0.87 1.08 l 0.87 0.71 l 0.87 0.92 l 0.91 0.40 l 0.87 1.26 l 0.87 0.13 l 0.87 0.65 l 0.87 0.34 l 0.87 -0.78 l 0.87 0.49 l 0.87 0.94 l 0.87 0.88 l 0.87 1.49 l 0.88 1.43 l 0.87 0.77 l 0.87 0.39 l 0.87 0.26 l 0.87 0.77 l 0.87 1.01 l 0.87 1.04 l 0.87 0.13 l 0.87 0.77 l 0.87 0.92 l 0.87 1.01 l 0.87 0.85 l 0.87 0.40 l 0.87 1.08 l 0.87 0.02 l 0.87 0.70 l 0.87 0.79 l 0.91 1.16 l 0.88 1.49 l 0.87 0.46 l 0.87 0.96 l 0.87 0.51 l 0.87 0.42 l 0.87 0.53 l 0.87 0.83 l 0.87 0.11 l 0.87 1.15 l 0.87 0.17 l 459.87 576.51 lineto 0.87 1.18 l 0.87 0.49 l 0.87 0.58 l 0.87 0.42 l 0.87 0.78 l 0.87 0.61 l 0.87 0.78 l 0.87 1.21 l 0.87 0.97 l 0.87 1.38 l 0.87 1.18 l 0.87 0.85 l 0.87 1.89 l 0.87 0.01 l 0.92 0 l 0.87 0.64 l 0.87 1.02 l 0.87 0.35 l 0.87 -1.00 l 0.87 1.41 l 0.87 1.70 l 0.87 -0.01 l 0.87 0.55 l 0.87 0.91 l 0.87 0.34 l 0.87 0.71 l 0.87 0.43 l 0.87 0.82 l 0.87 0.59 l 0.87 -0.05 l 0.87 0.11 l 0.87 1.12 l 0.87 0.70 l 0.87 0.45 l 0.88 0.65 l 0.87 0.81 l 0.87 0 l 0.87 0 l 0.87 0.20 l 0.87 1.37 l 0.91 0.80 l 0.87 0.44 l 0.87 0.61 l 0.87 -0.29 l 0.87 0.67 l 0.87 0.45 l 0.87 0.76 l 0.87 0.44 l 0.87 -0.19 l 0.87 0.81 l 0.87 1.87 l 0.87 0.69 l 0.88 0.20 l 0.87 1.37 l 0.87 0.65 l 0.87 0.85 l 0.87 0.57 l 0.87 -0.65 l 0.87 0.66 l 0.87 0.69 l 0.87 1.65 l 0.87 1.24 l 0.87 -0.53 l 0.87 -0.67 l 0.87 -0.17 l 0.87 -0.31 l 0.91 1.21 l 0.87 0.78 l 0.87 0.44 l 0.87 0.46 l 0.88 0.91 l 0.87 0.45 l 0.87 0.56 l 0.87 -0.07 l 0.87 0.52 l 0.87 1.12 l 0.87 1.18 l 0.87 1.15 l 0.87 1.25 l 0.87 0.33 l o 18.00 204.94 577.28 636.94 cl 0 0.8039 0 rgb 0.38 setlinewidth [] 0 setdash 1 setlinecap 1 setlinejoin 10.00 setmiterlimit np 139.66 278.38 m 0 0 l o np 139.66 278.38 m 0 8.99 l o 77.04 278.38 547.04 577.90 cl /ps 12 def /Font1 findfont 12 s 0 0 0 rgb 139.66 300.04 (0.01) 1 0 0 t 0 0 1 rgb 0.75 setlinewidth [] 0 setdash 1 setlinecap 1 setlinejoin 10.00 setmiterlimit np 130.97 636.94 m 0.16 -5.96 l 0.87 -29.77 l 0.87 -27.35 l 0.87 -23.76 l 0.87 -22.19 l 0.87 -18.93 l 0.88 -19.94 l 0.87 -17.25 l 0.87 -15.60 l 0.87 -14.01 l 0.87 -13.88 l 0.87 -10.34 l 0.87 -9.97 l 0.87 -9.50 l 0.87 -8.65 l 0.87 -6.63 l 0.87 -7.77 l 0.87 -7.32 l 0.87 -6.40 l 0.87 -4.39 l 0.87 -5.45 l 0.87 -4.60 l 0.87 -4.51 l 0.87 -4.87 l 0.92 -4.37 l 0.87 -3.10 l 0.87 -2.55 l 0.87 -2.58 l 0.87 -1.95 l 0.87 -1.30 l 0.87 -1.30 l 0.87 -1.61 l 0.87 -1.12 l 0.87 -1.40 l 0.87 -0.32 l 0.87 -0.93 l 0.87 -0.70 l 0.87 -1.47 l 0.87 -0.31 l 0.87 -0.14 l 0.87 -1.08 l 0.87 -0.21 l 0.87 -0.21 l 0.87 -0.90 l 0.87 -0.70 l 0.48 -0.22 l 0.39 0.32 l 0.87 0.37 l 0.87 0.05 l 0.87 0.46 l 0.87 -0.33 l 0.92 0.09 l 0.87 0.43 l 0.87 0.19 l 0.87 0.30 l 0.87 0.14 l 0.87 0.01 l 0.87 0.44 l 0.87 0.23 l 0.87 0 l 0.87 0.53 l 0.87 0 l 0.87 -0.06 l 0.87 0.59 l 0.87 -0.07 l 0.87 0.46 l 0.87 0.37 l 0.87 0.59 l 0.87 0.67 l 0.87 0.26 l 0.88 0.96 l 0.87 -0.17 l 0.87 0.37 l 0.87 0.63 l 0.87 0.63 l 0.87 -0.22 l 0.87 0.38 l 0.87 0.16 l 0.91 0.58 l 0.87 0.02 l 0.87 0.32 l 0.87 0.30 l 0.87 0.54 l 0.87 -0.02 l 0.87 0.95 l 0.87 0.32 l 0.87 0.27 l 0.87 0.63 l 0.88 0.11 l 0.87 0.08 l 0.87 0.47 l 0.87 0.44 l 0.87 0.43 l 0.87 0.41 l 0.87 0.71 l 0.87 0.43 l 0.87 0.22 l 0.87 0.77 l 0.87 0.21 l 216.56 326.63 lineto 0.87 0.44 l 0.87 0.56 l 0.87 0.34 l 0.87 0.73 l 0.91 0.42 l 0.87 0.53 l 0.88 0.31 l 0.87 0.69 l 0.87 -0.31 l 0.87 -0.15 l 0.87 0.84 l 0.87 0.28 l 0.87 0.03 l 0.87 0.59 l 0.87 0.09 l 0.87 0.42 l 0.87 0.32 l 0.87 0.30 l 0.87 0.26 l 0.87 0.61 l 0.87 -0.07 l 0.87 0.38 l 0.87 0.39 l 0.87 0.18 l 0.87 0.26 l 0.87 0.17 l 0.87 0.45 l 0.87 0.26 l 0.87 1.10 l 0.87 0.59 l 0.92 0.35 l 0.87 0.58 l 0.87 0.82 l 0.87 0.31 l 0.87 0.04 l 0.87 0.88 l 0.87 0.16 l 0.87 0.25 l 0.87 1.26 l 0.87 0.33 l 0.87 0.34 l 0.87 -0.03 l 0.87 -0.05 l 0.87 0.15 l 0.87 0.59 l 0.87 0.50 l 0.87 0.65 l 0.87 0.12 l 0.87 0.73 l 0.87 0.56 l 0.87 0.42 l 0.87 0.61 l 0.88 0.13 l 0.87 0.68 l 0.87 0.75 l 0.87 0.28 l 0.91 -0.07 l 0.87 0.56 l 0.87 0.43 l 0.87 0.49 l 0.87 0.64 l 0.87 0.46 l 0.87 0.58 l 0.87 0.17 l 0.87 0.88 l 0.87 0.30 l 0.87 0.85 l 0.87 0.47 l 0.87 0.09 l 0.87 0.31 l 0.88 0.99 l 0.87 0.49 l 0.87 0.37 l 0.87 -0.04 l 0.87 0.68 l 0.87 0.75 l 0.87 0.49 l 0.87 -0.01 l 0.87 0.47 l 0.87 0.47 l 0.87 -0.10 l 0.87 0.34 l 0.87 0.04 l 0.91 0.58 l 0.87 0.03 l 0.87 0.28 l 0.87 0.85 l 0.88 0.36 l 0.87 0.68 l 0.87 0.37 l 0.87 0.38 l 0.87 0.64 l 0.87 0.22 l 0.87 0.31 l 0.87 0.82 l 0.87 0.35 l 0.87 0.43 l 0.87 0.42 l 0.87 0.30 l 303.77 367.68 lineto 0.87 0.46 l 0.87 -0.27 l 0.87 0.40 l 0.87 0.47 l 0.87 0.65 l 0.87 0.07 l 0.87 0.22 l 0.87 0.45 l 0.87 0.37 l 0.92 1.28 l 0.87 0.77 l 0.87 0.69 l 0.87 0.22 l 0.87 0.73 l 0.87 0.78 l 0.87 0.22 l 0.87 0.56 l 0.87 0.81 l 0.87 -0.01 l 0.87 0.72 l 0.87 0.24 l 0.87 0.32 l 0.87 0.21 l 0.87 0.31 l 0.87 0.19 l 0.87 0.46 l 0.87 0.61 l 0.87 0.24 l 0.87 0.22 l 0.87 0.31 l 0.87 -0.14 l 0.87 0.52 l 0.87 0.21 l 0.88 0.47 l 0.87 0.38 l 0.91 0.58 l 0.87 0.41 l 0.87 -0.05 l 0.87 -0.34 l 0.87 0.71 l 0.87 0.76 l 0.87 0.24 l 0.87 0.42 l 0.87 0.55 l 0.87 0.46 l 0.87 0.23 l 0.87 0.45 l 0.87 0.84 l 0.87 0.46 l 0.87 0.53 l 0.87 0.27 l 0.88 0.02 l 0.87 0.68 l 0.87 0.87 l 0.87 0.72 l 0.87 0.34 l 0.87 0.17 l 0.87 0.77 l 0.87 -0.02 l 0.87 0.74 l 0.87 0.35 l 0.87 0.34 l 0.91 0.75 l 0.87 0.20 l 0.87 0.36 l 0.87 0.62 l 0.87 0.38 l 0.87 0.10 l 0.87 0.15 l 0.88 0.10 l 0.87 -0.03 l 0.87 0.68 l 0.87 0.59 l 0.87 0.60 l 0.87 0.25 l 0.87 0.04 l 0.87 0.12 l 0.87 0.61 l 0.87 0.80 l 0.87 0.30 l 0.87 0.79 l 0.87 0.69 l 0.87 0.31 l 0.87 0.15 l 0.87 0.16 l 0.87 0.51 l 0.87 0.65 l 0.87 0.45 l 0.92 0.01 l 0.87 0.55 l 0.87 0.62 l 0.87 -0.02 l 0.87 0.36 l 0.87 0.73 l 0.87 0.54 l 0.87 0.45 l 0.87 0.08 l 0.87 0.34 l 0.87 0.82 l 390.98 409.25 lineto 0.87 0.30 l 0.87 0.16 l 0.87 0.35 l 0.87 0.40 l 0.87 0.41 l 0.87 0.48 l 0.87 0.19 l 0.87 0.39 l 0.87 0.36 l 0.87 0.03 l 0.87 0.22 l 0.87 0.59 l 0.87 0.43 l 0.87 0.58 l 0.92 0.91 l 0.87 0.47 l 0.87 0.46 l 0.87 0.38 l 0.87 0.10 l 0.87 0.56 l 0.87 0.51 l 0.87 0.09 l 0.87 0.65 l 0.87 0.31 l 0.87 0.39 l 0.87 0.42 l 0.87 0.10 l 0.87 0.91 l 0.87 -0.17 l 0.87 -0.02 l 0.87 0.36 l 0.87 0.06 l 0.88 0.90 l 0.87 0.64 l 0.87 0.46 l 0.87 0.47 l 0.87 0.61 l 0.87 0.44 l 0.87 -0.02 l 0.87 0.64 l 0.91 1.08 l 0.87 0.38 l 0.87 0.96 l 0.87 0 l 0.87 0.33 l 0.87 0.59 l 0.87 0.62 l 0.87 0.33 l 0.87 0.33 l 0.87 0.30 l 0.88 0.38 l 0.87 0.15 l 0.87 0.42 l 0.87 0.80 l 0.87 0.50 l 0.87 0.56 l 0.87 -0.05 l 0.87 0.52 l 0.87 0.52 l 0.87 0.19 l 0.87 0.05 l 0.87 0.21 l 0.87 0.46 l 0.87 0.31 l 0.87 0.30 l 0.87 0.44 l 0.87 0.29 l 0.91 0.42 l 0.88 0.27 l 0.87 0.27 l 0.87 0.31 l 0.87 0.19 l 0.87 0.03 l 0.87 1.10 l 0.87 0.75 l 0.87 0.19 l 0.87 0.37 l 0.87 0.36 l 0.87 0 l 0.87 0.37 l 0.87 0.34 l 0.87 -0.04 l 0.87 0.51 l 0.87 0.30 l 0.87 0.50 l 0.87 0.15 l 0.87 0.79 l 0.87 0.12 l 0.87 0.22 l 0.87 0.60 l 0.87 0.53 l 0.87 0.60 l 0.87 0.59 l 0.92 0.89 l 0.87 1.02 l 0.87 0.35 l 0.87 0.01 l 0.87 0.42 l 0.87 0.52 l 478.19 448.90 lineto 0.87 0.38 l 0.87 -0.21 l 0.87 0.74 l 0.87 0.35 l 0.87 0.37 l 0.87 0.55 l 0.87 0.64 l 0.87 0.14 l 0.87 0.36 l 0.87 0.77 l 0.87 0.33 l 0.87 0.26 l 0.87 1.09 l 0.88 0.15 l 0.87 0.03 l 0.87 0.50 l 0.87 0.25 l 0.87 0.34 l 0.87 -0.12 l 0.91 -0.04 l 0.87 0.25 l 0.87 0.56 l 0.87 0.16 l 0.87 0.29 l 0.87 0.45 l 0.87 0.04 l 0.87 0.16 l 0.87 0.87 l 0.87 0.71 l 0.87 0.87 l 0.87 0.83 l 0.88 0.63 l 0.87 0.21 l 0.87 0.23 l 0.87 0.20 l 0.87 0.08 l 0.87 0.46 l 0.87 -0.02 l 0.87 0.60 l 0.87 0.32 l 0.87 0.83 l 0.87 -0.29 l 0.87 0.79 l 0.87 0.34 l 0.87 0.58 l 0.91 -0.32 l 0.87 -0.11 l 0.87 -0.11 l 0.87 0.96 l 0.88 0.21 l 0.87 0.59 l 0.87 0.37 l 0.87 0.76 l 0.87 0.19 l 0.87 0.53 l 0.87 0.51 l 0.87 0.40 l 0.87 0.33 l 0.87 0.49 l o 18.00 204.94 577.28 636.94 cl 0 0 1 rgb 0.38 setlinewidth [] 0 setdash 1 setlinecap 1 setlinejoin 10.00 setmiterlimit np 169.95 278.38 m 0 0 l o np 169.95 278.38 m 0 8.99 l o 77.04 278.38 547.04 577.90 cl /ps 12 def /Font1 findfont 12 s 0 0 0 rgb 169.95 298.39 (0.005) 1 0 0 t 0 1 1 rgb 0.75 setlinewidth [] 0 setdash 1 setlinecap 1 setlinejoin 10.00 setmiterlimit np 277.47 636.94 m 0.14 -1.03 l 0.88 -6.00 l 0.87 -5.98 l 0.87 -6.15 l 0.87 -5.86 l 0.87 -5.45 l 0.87 -6.00 l 0.87 -5.63 l 0.87 -5.28 l 0.87 -5.66 l 0.87 -5.09 l 0.87 -5.20 l 0.87 -4.82 l 0.87 -4.75 l 0.91 -5.14 l 0.87 -5.00 l 0.87 -4.85 l 0.87 -4.44 l 0.88 -4.82 l 0.87 -4.31 l 0.87 -4.45 l 0.87 -4.48 l 0.87 -4.15 l 0.87 -4.00 l 0.87 -4.48 l 0.87 -3.96 l 0.87 -3.99 l 0.87 -3.78 l 0.87 -3.72 l 0.87 -3.65 l 0.87 -3.75 l 0.87 -3.48 l 0.87 -3.59 l 0.87 -3.03 l 0.87 -3.49 l 0.87 -3.03 l 0.87 -3.15 l 0.87 -3.24 l 0.87 -3.13 l 0.87 -3.11 l 0.92 -3.59 l 0.87 -3.04 l 0.87 -2.87 l 0.87 -2.65 l 0.87 -2.75 l 0.87 -2.57 l 0.87 -2.39 l 0.87 -2.70 l 0.87 -2.83 l 0.87 -2.41 l 0.87 -2.33 l 0.87 -2.53 l 0.87 -2.52 l 0.87 -2.19 l 0.87 -2.49 l 0.87 -2.01 l 0.87 -2.09 l 0.87 -2.13 l 0.87 -2.24 l 0.87 -2.03 l 0.87 -2.16 l 0.87 -1.93 l 0.87 -1.88 l 0.87 -1.85 l 0.88 -1.75 l 0.87 -1.94 l 0.91 -1.67 l 0.87 -1.68 l 0.87 -1.89 l 0.87 -1.69 l 0.87 -1.72 l 0.87 -1.71 l 0.87 -1.75 l 0.87 -1.57 l 0.87 -1.86 l 0.87 -1.42 l 0.87 -1.66 l 0.87 -1.56 l 0.87 -1.41 l 0.87 -1.42 l 0.87 -1.41 l 0.87 -1.45 l 0.88 -1.57 l 0.87 -1.34 l 0.87 -1.42 l 0.87 -1.11 l 0.87 -1.08 l 0.87 -1.16 l 0.87 -1.15 l 0.87 -1.00 l 0.87 -1.20 l 0.87 -1.28 l 0.87 -1.39 l 0.91 -1.23 l 0.87 -1.02 l 0.87 -1.04 l 0.87 -0.94 l 0.87 -0.99 l 0.87 -0.91 l 363.95 353.37 lineto 0.88 -1.01 l 0.87 -0.95 l 0.87 -1.14 l 0.87 -0.89 l 0.87 -1.03 l 0.87 -0.95 l 0.87 -0.75 l 0.87 -0.73 l 0.87 -0.68 l 0.87 -0.78 l 0.87 -0.87 l 0.87 -0.89 l 0.87 -0.89 l 0.87 -0.66 l 0.87 -0.82 l 0.87 -0.77 l 0.87 -0.74 l 0.87 -0.55 l 0.87 -0.76 l 0.92 -0.61 l 0.87 -0.61 l 0.87 -0.55 l 0.87 -0.36 l 0.87 -0.59 l 0.87 -0.62 l 0.87 -0.64 l 0.87 -0.81 l 0.87 -0.67 l 0.87 -0.75 l 0.87 -0.71 l 0.87 -0.49 l 0.87 -0.52 l 0.87 -0.37 l 0.87 -0.51 l 0.87 -0.53 l 0.87 -0.51 l 0.87 -0.42 l 0.87 -0.35 l 0.87 -0.37 l 0.87 -0.60 l 0.87 -0.49 l 0.87 -0.37 l 0.87 -0.50 l 0.87 -0.42 l 0.87 -0.34 l 0.92 -0.41 l 0.87 -0.33 l 0.87 -0.50 l 0.87 -0.57 l 0.87 -0.29 l 0.87 -0.35 l 0.87 -0.42 l 0.87 -0.23 l 0.87 -0.47 l 0.87 -0.25 l 0.87 -0.39 l 0.87 -0.32 l 0.87 -0.30 l 0.87 -0.39 l 0.87 -0.32 l 0.87 -0.34 l 0.87 -0.28 l 0.87 -0.37 l 0.88 -0.30 l 0.87 -0.20 l 0.87 -0.27 l 0.87 -0.27 l 0.87 -0.18 l 0.87 -0.36 l 0.87 -0.24 l 0.87 -0.35 l 0.91 -0.26 l 0.87 -0.24 l 0.87 -0.35 l 0.87 -0.36 l 0.87 -0.19 l 0.87 -0.23 l 0.87 -0.21 l 0.87 -0.19 l 0.87 -0.18 l 0.87 -0.18 l 0.88 -0.22 l 0.87 -0.28 l 0.87 -0.19 l 0.87 -0.11 l 0.87 -0.05 l 0.87 -0.10 l 0.87 -0.10 l 0.87 -0.14 l 0.87 0.01 l 0.87 -0.12 l 0.87 -0.16 l 0.87 -0.10 l 0.87 -0.05 l 0.87 -0.09 l 0.87 -0.10 l 0.87 -0.06 l 0.87 -0.08 l 0.91 -0.08 l 451.17 310.65 lineto 0.87 -0.14 l 0.87 -0.12 l 0.87 -0.09 l 0.87 -0.06 l 0.87 -0.19 l 0.87 -0.14 l 0.87 -0.01 l 0.87 -0.07 l 0.87 -0.08 l 0.87 -0.12 l 0.87 -0.21 l 0.87 -0.03 l 0.87 0.02 l 0.87 0.06 l 0.87 0.01 l 0.87 -0.09 l 0.87 -0.03 l 0.87 -0.16 l 0.87 -0.09 l 0.87 -0.12 l 0.87 0.06 l 0.87 -0.09 l 0.87 -0.04 l 0.87 -0.06 l 0.92 -0.13 l 0.87 -0.01 l 0.87 0.03 l 0.87 0.02 l 0.87 -0.07 l 0.87 -0.04 l 0.87 -0.01 l 0.87 0.02 l 0.87 0.01 l 0.87 -0.04 l 0.87 -0.06 l 0.87 -0.09 l 0.87 0.15 l 0.87 -0.06 l 0.87 -0.08 l 0.87 0.07 l 0.87 -0.06 l 0.87 0 l 0.87 -0.07 l 0.22 -0.07 l 0.65 0.03 l 0.88 0.17 l 0.87 0.03 l 0.87 0.06 l 0.87 -0.06 l 0.87 -0.05 l 0.87 0.12 l 0.91 -0.14 l 0.87 -0.04 l 0.87 -0.02 l 0.87 0.06 l 0.87 0.08 l 0.87 0.07 l 0.87 0.02 l 0.87 -0.03 l 0.87 0.02 l 0.87 0.02 l 0.87 -0.01 l 0.87 0.05 l 0.88 0.05 l 0.87 0.07 l 0.87 -0.06 l 0.87 -0.01 l 0.87 0 l 0.87 -0.13 l 0.87 0.03 l 0.87 0.01 l 0.87 -0.12 l 0.87 0.10 l 0.87 -0.02 l 0.87 0.08 l 0.87 0.05 l 0.87 0.07 l 0.91 -0.07 l 0.87 -0.02 l 0.87 -0.06 l 0.87 -0.05 l 0.88 0.07 l 0.87 -0.04 l 0.87 0.04 l 0.87 0.14 l 0.87 -0.02 l 0.87 -0.05 l 0.87 -0.02 l 0.87 0.13 l 0.87 0.11 l 0.87 -0.07 l o 18.00 204.94 577.28 636.94 cl 0 1 1 rgb 0.38 setlinewidth [] 0 setdash 1 setlinecap 1 setlinejoin 10.00 setmiterlimit np 488.85 278.38 m 0 0 l o np 488.85 278.38 m 0 8.99 l o 77.04 278.38 547.04 577.90 cl /ps 12 def /Font1 findfont 12 s 0 0 0 rgb 488.85 297.33 (0.001) 1 0 0 t 0.75 setlinewidth [] 0 setdash 1 setlinecap 1 setlinejoin 10.00 setmiterlimit np 77.04 308.37 m 470.00 0 l o ep %%Trailer %%Pages: 1 %%EOF gbm/inst/doc/gbm.pdf0000644000176000001440000053664512143234277014061 0ustar ripleyusers%PDF-1.5 % 1 0 obj << /Type /ObjStm /Length 3884 /Filter /FlateDecode /N 65 /First 516 >> stream x[kW8B&9 e{9<H e& mt7 -ottmI[UdE)&D4biXȌU2G,bC;X&$ ŤF0i C&8FyĔP!fJ:FcTr)-ӂҘ"1/CLGX"fDsaY 4a&!oEP`b:d, s1cFxO0 #J*tY9 18Q[ *YB&`Q !%ebq~o5Va^R4.0{f o  @o -rCH]< "G3.H H1@/Rk_҄E~M(?`x2:IgO\3~>Tv0Z:MYwd{~!pEj Z\Ït0JLӧZݤE[+z"5J_-n#dBzom_+>)<_{>S>KwSf0n7 |LڿlbP>üwOO>m{#f0ݠm,LKh@,d:] &8 Mo1g &ڼK&O.gN$[Qv fSs$~wdǣ~vN/ ֋Iذ(bBo;n 7NPra5a}Tֺ##lOMfxk~C\͐Ƴa:Eͷ,\' O'Է/͋u~>oo={ sS>4n|izC2tvw<^L#~ Sgr\}Mgn˸O*\=]ǚ_wܿ8|O'd$f~$@:Oý7} ڰttzq ,e>JX KAeAJAeXG%W<N rp|Ht9QO N3~ a36L! _)U~"۰fC8owZ^3l=|*xMy/).jxS>LF/t68}gW9Gz^Go+ΫxPQOe%+qJrwod)fuG(qV}gMɷ~4Mg+dBR0>J.IOulÙ ZRn tq򿹬L҉[]Uв9z5~z}>+ɶaȈђiKϯOZd.\JT"p-ǣBk-"sUd&{ROlxl8XfG<|z[Fv݈ϣj>_d ~ǻV^~ݥ\Y0Db6uqq<:YX]vC7F:.跄Ah x}E@7Iva5ZQ95I0Ų&XE'_"T+#e>šu7K1slaZHzFQY)1} 1KǨqiVڈGi:ƥ]RAh3n nqzv C 5Dj!JW2oeĕhk D}˫{IJleR&Ju4uO$JYl:PQG c{Q h-j%F4v,6 <ȏz)sL;0.rmJĚ O!5Xs%5Xk-y6ELc*emڕ[+k#YBAkn5JVkƚepI:?ݥ.)_Vc> hTZqHrn-ɦ}(Bm(-96ٚJ{TGq E~b/^!3bQo2F¡!BB|Ž{= ГzXxP/ LfmJWs%. 8wq@Jl0ys7Mj= 0Dg1qqlFN _W$Cb?tpU%PqJK9@hyxPGxx*aq,iORBIH\˟ܷbo+}4\.qxhهjET,$tJ*{+VUֶ(` n n*#qrU@ZEe>{E\"8S;lX:F'x>P0eI5Z@[f0o'?[Q_VcaSE` AJ 5l~JĆҷk=@ξ$5(4Quk4~"ߛoFwY2ߔ_)ƣ]e{1|F"oA˼x^=>;/([ \h9L/SB  )E!7%Yz)w4}2 ^>ኺϻdZendstream endobj 67 0 obj << /Filter /FlateDecode /Length 2076 >> stream xڍXKs6Wj&btɥUO;CK)<~}H9$X]]xΔ UZb}YڢP ,ۏTI{l#n+e~EÚ;cxY0iVbUZ"oJv@Yx1^>t?!-:~}Gl6yXvD~ꅂ BEֹ2 DV kxD0 6_Jyj˼&5e6 A)Wk??2U:(JiڼƦUl2l$HX'<Õ*ea(Gz4P;nt|A t[9)S%df\wL33x#KT0!EI, ˍ׾7-Ph)c3(,\g?o"m0F(H֥UU-E3&m)~n' יYA'[H[ap & %gUI,G^g\774$ F#YO}R((fExtO; aE.'^464N sY />EB=-1eo"lG1[X޳;'ka}yKf/!1N(t4C­ &CW?"ap [a L"KiG\<-y(j!*q&3=_sA4CFil|Eȫ8 pݽ/%)G$/ o^ %"P^Bb kpy#-=|lrR%sSV7LIyx '/j@"'BSYgf2Dcyϵ#lV_c`>ZJ3L8==:EK*eaP1 RM5rRq܂8/DCH"Xx򧶼ϐ܆<|1x xe}FS&Ux(wQ/ٸY2M Yy% =7/ߩ|]`;y '!; 编t|lga9Q|X]3ȫ dtLExt|G&'Ц#yc?~gK ';k 0Oc1֖Y>`t}OCv ^RVSWNrhЯ2gבEi\{8縋| % |D.n4]d!NL|~b]kÔ)-CE[n"tM(X.㠐j*1fyLs/0 W" w]=gjIw6qVڹ`Οd:/|yM!cIeZR3Iva^Jx*V`~h'KUkT-eJJ}ƃpyY(OǍڑ~V=Jđ'eࡾAg>![Y zWǦ7PHLc iX ئtu8QPɴDLDq؋v ҏbRU'=34 9B)JCbYo)_oz2D%YLIcEF<>8&SG-A'kre l_mk\|#{xѾuiUhB/Zݾ/:-Z-VujG>ԫNUWZz ,~u тDbbN@:υ%f- cUj*=|bGK5B8?ƙ < S#\"Q#i(D6rʂH'oDG&.ԵI m˂.S`3!)-xܢG,+M{_}endstream endobj 68 0 obj << /Filter /FlateDecode /Length1 1077 /Length2 4668 /Length3 532 /Length 5368 >> stream xgX鶆 HDz{J@S0^0tP MAzA)EW;Μ=_:^ֻ>&"P3L Š ebu Q,L(.''Tvŀ`IP鏁c?dHA !XwA.p_@~4y0>0(@\`078 HYz{‡C h@Fh #?ŵ#!1 GW鍅ah( g؟ aP7򟧺XrCb^Zp?uqB^?0!s#HRMGa-=R`~@[1Q11q|#?xi\P8 R@7OR@q a~ QI0N)Y $GEZ R HFd ÿ ߄u._Pr% _ ]x[/Ex_/E` b~A/ x??CM ("!K$2r|s7\ShH͂夂jn :Z)">%" Z<DO#{d:va (j'Q )Bѹeiօ9;}&_$y8RZJ2&WMֽXɘϾ4i)$G"B %9.{>-#[ hY!ׅa\s|+UGFq9&v%j3:K M!9{L )ۊUh,_a6%=-"ȫ2i0 BΥ >79г#R{rS>+O3G%,վ[ < x zCda泄+6 ?\^f/.^{>5-5W,b}t4mks@`':]'6qZ'\]vmlvLRjz]F%u9X":)ߪG?2Yؿ? Ώ:xoB] ^ULكYP:l!u'+æYp*yȢ= irrΊ[>C׶UpǺ}Ovq*z>3:ekEゾD7+q2AJR%܎tU~ӱMj U]JՅBI܉Ik bSڳIfVCP>##`[٭AR׾VpG5o |O9nX"Fx|No2AJ ?\︃CfN ;Wrj*J!1vVAw~bi[hm?j`(=DG#63QĢ߾H~9 }3ړMΪ1ʄCqE~Úë;q7 |gjr1+3}̘jWv홨W]z(P_ڼ0wO L&i3h_7팶#,XČPU;lB"4d$ga׀o9?=L |{^iugŁWn? !i 1\!b'GVdY <ݣ&c}cj\elʚ0G` S2nofʫ mt!]co{ϮY@cP iEpe(\bq1Fǩ6(1&3gܹJ?"{hQ#-VHusL8&.OՂOkZ;B W2H++~_v\~>$U*<2p+JoǭNo LɃ'FO4Ӻ U7+8Yf+<}:nd7Q >y_Ѩ_/O{&^ 8KTSNWλc@ϠUs"|/)"U7pQ%7BwN7w?G"#(G~WNs栎2Qu8\}RUGAJCBHn| oWe7Z|{eWX1 iokanQؼtoIlWY%X{RBC߈GtmϽкȗL<N<ͤh?&Fy"6"IJ-xlK]~>?~ &DoUöJvz8>)z|7I-(ɒ}(@bEUmd{aG`dB$h3 y\Fft9IjnhԤnW dx5~˸eNELZ=E~_w)5mS_83Hb!⻂ZթJGmZ 5͞jJZ+)t'рĖNz^]/?W8yeq2Vn|M vܩj$ˆoՕ_]D*LqO,'OfkR>l:S,jg/D, oYW-rEXJ-Emgq(Ѿd4k. _Qmi1-;ơuN AeMM2L22׵#e"bHB(A:6cdUxȗȈ-4+Yykr u Lk-C&q9dEfrSnMmJ }<탴 י]E'@qu+&DV`F,]n6c5nsrH#lw}UB8G4+x>VWx_sVųƙk ms½8]{b漮>N#NA=RnfkGf l]2zc(!fa<s,̟Q4=m܁.K~]qp-_f(70奋\n*!c[=?{iN6"dan&[|_ΩI]Auif-;/r Dy2HcaCVG^oNBgJ@z3[pLXI_dY^iGD%mݜ9Ť8 遜Ido6cXYrFHqEA?gk\4n6)Z- Zϊҳ>GNk^bD9SqRO& I>Z WͤtNH\X,bX 1BUmjH9.Մ("c$k=7hpUCqhu,#j, tUqv5CcQrygI$}hu> F#794OEg5>O ݦҠu&,2% X0ݤ#ЃXY 5? yG=rA (V)x:t1c̵F4:g穓}W2,ޅT.zr<`o/B`h$s/endstream endobj 69 0 obj << /Filter /FlateDecode /Length1 967 /Length2 3772 /Length3 532 /Length 4438 >> stream xy<˴X$q˱o3${}i{bVdd'مJʱd'H&ʒcMw:<=u}?; 4aI09 )09(DTT"HhVA* &{ PvNMQE tq@ÓHJ(R1 D`8 bhH`CARXXA,A"H 0F70dE?dS~ H3HLJ48O  f8^ }|?i_8 L ,d?$!4[IHO!? &h}iA\@$@_?2ӺC@P9(F+r^X$]s@@Bh7FJ@0 @c H%' Gٳ< ~A + _IJ0{_ʴ/R oѮaPߐ y 0ICk_@ 9*\@Y@VvU@Y dĒ~yQh !գK˘u+شSAW֤ 7zR&wmrzw\Bqq3 o9{KNff [kdwbmb~ĥ1cs";fC}ռV3 uEgK0ƉLB2()룚~89{g"q jg{E+[7<>bxU4Vh)>>%nΓ"C0*WHƓW9vlXRݣ^ʡPD!Ƙ+a+OЩK%$PoNImyT%hڛODn=w>268zDZ10v͢O.㮿Eǩͯ~׍bLn-!.f7E=Ŗ#ovmd H7vܶm[Vo)2j ]B^)ánjdƾ/j栫z0p1^>ٺYAѯcˍ\0$*lV  JLR2rR8Y{: ϧBWJS5/USmj]l>cDD=R\qQc5-7T̹60CLbnW,M6O2FVlM,>rKbH!%S[> яڋl> N L_YԕL7_(.G,M̟)lʳ 7-zbP #Qgta1 Ti׆) 'YxWړ.$e]9wVֽ`Kz^u#kN[νM ͻ59iq'G|TscUwLݨWJDlF|8|f+eUa_8оc=a-xVFcDf k% %Qpm_"{—ƚ7*;!vPB*?;gyI)N9Jwqcs2ר<3޳ݪM̟=tw&9;4$5u>xZ#jUttu =! 974ݽ]4KyE0y7iX`R[Jgo޼7X`bb5"RQ7@w'\o(7RP.0⏽;Me@NttAۚanϱ/Km=IfhWi}s:J$ÏI`XyL达^Lb'`.S>Y#"r=޷)?,BŶ$-/fp63e-FgO|+ P_A#G}LWԨH󂪟|z_BJ9H'YfpA1.f!:P_7IUa=3wϽ2tH,F068] =c,*\b\x."UJ8\&YO~_VM]Em}l8ͯ:\0Z^, vUBW-wӭF+x<ӏPe4DzPSMRѽmHR娬4!Oog|Y ;L?Xv^v2پu uEHނѡZ^v5%e_&ӓ@'z8^a͌8CQNrU}ssLQ q>51aΫ6a] ~.>S-MnL4@r/-b~/wn-wuNE[|%oW2XhiG|:sCe(.#HD 1ѻM\ |rObaaɓɟ̴.޲IS"9)^G;k[ gK+$XS"qw,9< NgYVjR+XV8[Z=AN]~m ཆ.U}nl;6W1ia\:&A;Q/$T3It {y0ѺKľ!Q#~,w<ɂȊF(7ug՗ZVo~6f"uT=Ϸď>bܵ]E3E8.`mbu qe.igH gy[-aA ESHWUԏ*MC+ʪ_N'}Ua7&{vQxw rŭ?&CޒY*C/1Aa՞ȸ Իs%|fzeA?~/NI M8'jj?^o _1}N)L~|Y̅B XR+0>vꥡt?| /B"$A endstream endobj 70 0 obj << /Filter /FlateDecode /Length1 2087 /Length2 15504 /Length3 532 /Length 16646 >> stream xUTͶ3qww.w =&k`=J/Ϲ:3j{@A b`twe`adɫ0XE((ĜƮVƮ@^ +@h/;/+@@-FO@lejl7v}ajl Pu0z2Dlm*<P݁fp,,3+SW K 9ϐ;_6i&m=f@s8&57l%lmB_vV` t;w&ͬ-_!+I+%8ܿ,0+hמkLU߳/fq2#~_ZfVVN'G|`eo=> 31;~</ e0m]+ `R7|h&s?}d'aqN-/\f&<09Mx;?Sٙ?R@,_m\>,:) sKOGKX9 } 'o8LJ!WKg_OZa7\9&rVg=4vV0tr3gr"CJ}Ȉ ?1ĿCE}XPCrC]}+u?->TЇ(PWCC]ϩX ?:?O/Z9ٙǫO{|,},!cX_ϖ4_/P ? qf_/0Ygpe{Õ_/p~r ?\9Õ_/p~Õ_/p~lftX} :xx3~o?2Mݜ?Ic?as­.9XIT@҂Z+4w$ږ:n5h=Wf#B~wQvntՎu)yH|ٲp{85^]4}9tD-vAM׽[V'dzxh% JS(ɉ~0mtW]nhυ0yIozq]l**GBgeE(#Y Ug!ʘtpzXm%y_]Z$)8J -dV/ b`N<]I;|\c4ht Iۭɰu3f0Xn: %{ gY<[Ҽ)# +i&Pvf"!X4*:'Ka%RZ"R2a/)Gm,pXU ~DI^%X7Vx3b5cf]np CK3e6^h(ڙ3Ouj6Tذ9 2hS~\f9HR Pv Ev~VVcΕzx!ZM(T^#NSmR׫vT؉(9j‘gjIumu6EG]mr`\C7<ڤ*v/S{CR"G_Q,STKx4f Qp`-ZN|wa8̦`}n5hta%qjg %s]r|\ݷ3Z 1'>=ޅj%Afʹ9n;]$F4~d[J;|e^Uz 8.P89uBob6e%c ?j9"zjkZU˅~sBٔj0.q~{]OryL$T 43@h޽)[ppfިʋ _OƿD*>iRW$'=V /0)!_?;?kjÛ{kN"!kG66&6┺Ъ(2nvğT3;r(ܠ|BϚg ɒ t!jҿ|RM k*$Gg6 ,2w%=/p!ޠ-i Az{՜$k|Ѧz܂~ e>46e 6;Lhe12f9tD%Hȩ%kHНi\;{wKƵKIf`mb+{s3Qe='RX)oŠGTR  e)m#̓Wl{j't`}[I"{" [U.8 Q{LŘkυ FBY<3lWA%IBM}mki.%\_WsPᜄp[)DlFX̛ 87×᫄6,/2/B &|Hth[E\(:K빭Ԣ4tP$ \OE"[/1]T[NͮȲoEfe<\Sn.K?œTg0$t k-}íU9^4 Z"spX^IXαFXg=5y=:{3o'{ ʃ:3F\$.AMy&JO{[ @H 4ݾ7D-\Q|ٕפ ".*7GH8Ƶ RhkU OFIwwoX<*c}/ASK~ 5)CC^hz/ԿW֑`Ey:yi$T8K6a~ "@@d9q}#WGUsttbqC=hӮx5"gpÛA"xZ\}pV)T6m4W% ɛNx=CZ*53s(^Wbd˅@C?s]]؅U΀(CY$Gσ4@nDKsگgK -^gL3lzq BX; \4XFs[$ws$6O .P* ,?Vqat4e`s9b)zs3$ |\!d5# ?m9oo6@AANa&3NYRQA6gQKwŝ}#MS $wy՚>ϔxT/NHKM9_5<Ď6vнy-|"Ӟ Ye3U'MJvuH) ,%c6-+>²M0jWΏ9?x0lY9ZCbHA[ND^]U2]W~?!zT"|WVڅ%0^@My|VXE;=Cٱxf;zU QCPn]Ya vU L|$Ԛt~+QXa!(rH^G [hT% 6h =y;؄[{/5"Sziˑ\ךܼ'dņO(,[G09!c+ayUN8ɕVIєԙOfd` ypH)>'@~C-bπ\ݜ,\p쁺i$.Ng;5Ğ'~^ F6ϛK>3ڳŁ1zߖ+_xcaM yxײg?,i'SL.t)+713&X#g/&Xqv2r$p,3DN5U#-\h1ʁK+(TyÒOŤ:-,W)i>)ve.]idR;(B69᪗.q6U| ysPuJ\NGY+C;5>< %|\n3`(EFwȫrȑl1glƦ2m/]mM8rClwNC'pTK`]+Xgs AZ~sƵUlhL#\Ri=DɓI#3iߢC/կXbOhM$ XhXGUIPs>-oo;+% &HV̥ :H|(+QƒNO;n\ࣀi4O-povu$ڔUh9. '6&™,< #nFmV[WAΥǢI 9yƚiFExj Z.Co/d $јޙ+4qH5z70'2pyQ 8YNJ$d bw^M,U[k"JŪNooWvzwK4`x׾4ryKAyf ^ne|2%o !#m#j}wY ./Muz}#i:ֳ9HzN>{ GT1DG&+jAF>Rm>-i?ȷ"W\Eiymft[MvRy9N%fյqmj0Z l=Wp&hri8{ F08:7kU4s*DЯ=(A&m$3k]t\"a#ۦ! Uh JJHvlhv께mf8piG -$Wg™"yhbIRZblzOV&1aNJo/+;$pcNlx(SÑ /YV>io6}ҵTzܽc"1[H%,G!t?t6OY/yu1KLv#hnK#W%MMcbCo. e%W&.h"W- ZjaYe %寯oܪ+£j'(:r> 4i=Du=ϳ[\ :O4&tos[ܙȯ^T ez`@UԨLk5"_)q/GP ٳS@Ⱦ77zq:b }+0 ܮs:rhp@E/htAX/`i&{?&¿i~? MBLEfܼȪ_=t["QQz+-~ )^ePnJno^kbQ\Xp,z|8NwE|DAؗۃ < ٟp`;,6'v~,Ss"q;|Bb2~%g5sY uO#wiV Ԋ]b#U^X[LIa|jB0ĒQh[wx>Mi?v[/YPY_'63_$04 āS7Q\P-j4'JYsm=B]/zpߘfkW-[77.\]"տ8ZwC 3l~%~Ջ1̈́Y̿jF (?%݈ 4J*'3G,43R\7&3 AA6ťj3UxwQ?e(p*zf)`N-Gyy*<ޗ t|$0q#pe͘i!^`XB̘,?y=:jbT-#Y*[XrzVƨG~-E2R$ %/n^pH;.|wzl=hj^{F[pФơAG2/ R:\hX*{pI/C^@̧, |qתV={u2WR$ PɃgQ^~c_OXxN+O. %RW*f1kc.sY+ aQU$jerTS|1g0TA$wI.eYP9Da$0av rζQo5҃;^G" D)Ry_|\-× >+VKӓ}%$QY SVVΕArgQ>Y>У__'O)z՘_wo+"q3g2nj>@ObYdϪ-YX޷kgvC"MAe ,S˒ :Plg #8AG٤b!jQh "vũ "6FSrŞyP 98UQ|p*jI꿦lP+X%"{1~ {~ޥ}B Iw&^2"@1,)#HV\`1hAΛ-1*+A<'QZrA< qbhRFCe.f[M{|nג/?uz;b!(m4M~'P&1 ?/~qLW3P WnDd! U4NDZVFW^7Ίq+57<(*FUc]->[6y)]fƳIG|>]׾zPUPL.^桛xVme]v꧍- L9\פE `I/N1pೱ&3lTu+D_FbC@Co-9̠%t=q'On6Aәib9b*Yl~GFFԢ );T}$xP}L(0lMg[c r57a M_7P#$+0C? xab7%bhU(Yܾ&t6anp,#I|`Cg#. 8.AI/r@eVW}z]퇺}@kV8o7)!~.R>d]kgSSF`&L yS/fgrí/7(HDJ쩬B;9,>ؼ<=g7?d~j0ScŶZ0r53!*+-r{smW8X]K .EPP<4 Xbgx=GScӭͣj :M*X==)V쐴,3do>)X$WNAϡ볓EvUlne 8}jv:&fJ;w&PA'לC K ~RQ sE_J^ g|&Fȱ[0ڿJ,ofpײ~S+ K9[z_w[= 茀ӕlml!I4&8C&v{2sOI'E/3*WC'߸< (=*4,΃bGr x#zq}i+xMD:Ml67@fhżS}֫ F|Vި嘻wRZn '\`~j'D-Plo$f5lֱJ߬NŚﱞ*M:tY;Q8gD~Y=pv$iO/IN-VmJ^r?ݰF/[_xe @/hGܣ;K{z]uh[_ QI4vZzMH^ :H_cxK(N.}9+y?4 XfsI6Ucp*Ki`xBQ~O6\b\##oC2Bb#;Fy8"*..-hݢ(;}]Ӏ~I2'dTԡ$Mzp[@YgF57CfO&znx,Z0v73'Ir 6 lGDޱ9,~3bO!^d1zwzqyύ;GF43x^ H Cu}SvӣBk,!6F 3TCbG*,59 R]z]qP:ĽjHGDz'iY@$LBj;T”!m-tmLS*CN*&Njï{Q+R:O\}>Bkis?#Rr+r<S nw_ȈsUޝ;бL76Vx#ÿ"q|mx%ctS|,0~!vi:yА-ȻzS_ jG\.% [!bT-ƹ%7DT0/J@Aʣaw]vs ]1b$r9:,tJ o*M}yfCMa@; n/ 492J %!i!۸;.r1{ub#J;ͦڏT5chaAڌ{eSʪG`E8Pѻq};'A9Oܾpفd'p;lo,E'/=koUu3ؿXډ-Rao[w32ƞh u`geՕd^?3B` 6}8s&. sTjtokV6䜬ΗkGR2`26"c(!cJKo^cg5kЬ6b0iJ+Dh{dEHGb~IJ^WFՏ= )MF1yF =_^sj'Y3V*R^6_^3&A>?E+Fcu!zTM7^ ʝ!ͻyu3T6pgSiYc1XMlqGR`6WS~ʵv~hl@\7[2GKeQKC!MF#0ņZb7[g` XJV6ɧn62/e5>b~BK1_pJwA2wJM-I@RJ5JYA@tgG˕^\j59Q ?P鰞IKTBdGזpxSXV1ʈ2KME$>6d?{r,;)o;9#ikR㚽NعrWaܥOG@&u^̃nƍ,e5 Tڋ6dBKП,:zX. I]=@rR5O'XPw$Oo=ӗEP0=KuoR=|[هZL8c6,wDjsn ؑ#Aa'WhPBB!޸unw[(0)oBKwzf(\C>#_v!2Uok:1ƙec`06M,m+v 'p߳%ǸASxl]4op@,4wQx&{^m)L!!cki#>sZw4e5W.f/eǜ>C51hd^sTˠ ^,\iRA}o]!ƱZtCIX^_2fw4Ue? ܨ5y$-HP-\7S zS g9^%)>ĩ\*REn~SȄ}vN3Wvre#d:~ICprc>^ecA@ >3*Fp 9Lb8'/A7{:mиW<_ddB|K`S=^Sx!>''#9b=:<6aH ۅHec߸L L#L#}drUpA4\%U)8}'{.ڿ9Βl L(cOؘ\&IQve },>lF:\^iDi0]~ޞ 7̲DТAo)q[_5WGHߡߍɧ3*w?׼z'!F@훽*Q"p"੽z?Nwr0әGur챾-}V5u1NVA[,l||kkKDMA%w['W@n8 k?gZ4NƂwHzνLOfۼxg9ۯ3EJf&'|j)AkO&!_['JLv?$˶bL\hN^a!y>]ʜGȧ>.Rr{MkO4Dڒr.|Nb>ziD0ӣdKp#قit^rٖ+ӂK);f%QŃZzJeM?tg7iʼn֏=| xenv/3Ⱦ͡2yU$YS(PYw PWwAv#ݕv;ӷݧAq,޽5GeRBuB>^yp6V\8v |n B;}gGG% gRqkз r u50{5dD5a}֞KnCy)V)>n= zCGU!aE8»*WhՎH-I]\m{k[n|oe(JsA4T4X_%m27y.\5 OG>D'z>og?H|NWJYY <@b0#LYfAj<CPb^oX*)k[{@މބo |C4mUMSl)'u,nUto汏D Cfv+5 "ө-ݵ/ An߷DqKn3T?x|]> SΝx7rIߵmPū; Fl|p ;LЗjȥ(@ΈK-V-@^o ke"@da: Il DG UaIESbc7 xu` $s/kKӷ$g Ca$#-KHϟ]?fd-0;XYq:qJJ{HB>~C(TM\lxجV1xFSt5%<<!Ht]fgEn7 8̸;fΚ j~NOqJKuݳ7"Enn1kiWMEe8+/NǜlP]$fRy_tɠO3&g,6(K=-\SĚZWydf PI굕V62IFL|rQP#]v7$wuVts=EeBPp(͋b[A ݫ|a]J$RρdQ _c?ۗ4/\ ͕}#_Pd^=BfZxq&Z#=THtHcvl#WHizbJ=4qF!;Y4rRY[4ЦV쉴,+HΏ%ޯ${-9]tu;MǕF}gsZE]ܞ, f$9=3ًD# #S]N(0*7e=Kڴ3-).gv=&4fu7%~542S#q 1#+6K!t@f &90y?`ud&u,eDL^ Ybk4: "̻jaszY~ a8K䓞,9OЗlH9IJ֫LϿ#`4-&x̆ Imp3oJuQ5 e0]-h.5Mq8_ć IӲS)SM>}zrN145 }l :/v UڡsUb>Xr/[˘﬋0OB6 &;: O*2jr`}zB h!5+HB7VIBv'8O!,=x)-ABFMQ=rAH򙿯a<;m.]ժǔrT zvœ {RiKӪ8Z,aa/3K.4X#m<(SbR!Ա [ߓ\(H(%gaoPr;U,`]؟>4tkh-MLV:i%F3@ȒZ"|{HMP,QfhEbtG~^>IpZCsr%٥w"!Q<_ZH3IA tP2]=a*xe7d$݈ć5N:\^_$Gq&ΥKotPY\"@~)L&uO3hÕ1LSrVbVlБvcs=)]2KU7j%[ RiRԛ +4ep{x߀ OL`j 4vvu3v?&endstream endobj 71 0 obj << /Filter /FlateDecode /Length1 1533 /Length2 8733 /Length3 532 /Length 9620 >> stream xUX\۶nq VK/*!wwB \Bvj}J>>R/>>SkjKYA,@;;( Qr\2 sw;X$ R6n._W qqu00$rYj 'm݇ 7 Vv /#%5 𯲕/y\`R&$3h;@XfC?ptT7wk;9w PX\٪4(;YJmAAVvksG7u?`c[󥜆!5|YaqqqpqaϿ{ar`K@]]}0`O~@ yÌ99w-HWv78]< =yy''>-t]-a>.7al p]Kl80 lMҿI) / ]7LZ7T,A74!AXo%M«K`Y&X?ZDfoel@،@paN0 pa_F4<@3xNvnC ^>h }tNLAsK0 hDU\d52 MKzS.Ѕ$x/*.,dDIuOrL(%1sl62l5-g=>Tҁ"M&Wۨ+pFzsu"3PkeTEFz&#NG.@:=oK SMtͮT}7 oG^Wlxe[POcsf܍껝gK o4ОV+D+ `ŴwR5&60hY-53T-#UQxj9+N|G@&-t)IG%|1a.\Û,JF_e Q=x4t[JߚGC0lK]duE784U˥NXek@S?KYR?wƋQl0qf8"7k:}mvudSM#4F/RPxϽkxLWox_-3xz矠<"-YcoaWfut7PdrR>t)|Է>[20!yA~捂NF`ں[Gy_\mDwbZLC 3o|m \U%|XwT?~V0i勩 k}/{v Ё0nȍ/T~c};$x8Bcٙ2ekz+> g0L0A$XFMڂ*0,[ ߩz=X[>(^~G 5;{kc+WZyNĥ:CDw'xkN`ak^:4Rȥn|E1sS՝R.6&KޏhRo\':I:Z-ʽ k+?Q yp O׍7 +I)b` "C}H} aÌr15PމlE#:0~1i[ Xi欢HyP39 ؛t+l爇R1پGy6yFʓ $Q: ny\3e/|qt2g!B9^!T<{Ľk$'g^!ds69\x} -펱QZ@ffˠ>BtNj2BǸf4_ʸs }MsՒ0 Cyc;kY :H9t%<[qy\'PN MSxz94!C2UmGVV~ OXJa_M6A@~\[i%rCiN X͉/^➗V>Q*K^48hJt(Z~$ zP ~q8B;2`4I.[nD~/@\@ҌoBI6!/rx>.ʝfKS[F%2=[31giĄK3D0fۧ0ii;F%6[$ȐD>P:UxŨM8@]u؇Cj迯zVV/ͼ0L)+ CZ\&ߕ8"s[ty#=ӆWf Y6)nb jӯF@i1jpw/٠ Lk,)Le?懕A#NܝuO/Z~Z@ ,;ٞT9 HsKmFճx+STVs,'@8ө82+:X6hZ Kg!zdɜ4Nx}jl r Qѓ>:Hr1_+Ԧ4~ y\d; zM0#ə4/' s IKSOďu>hͨ7I : D [Wơ;?,ZQ _X`+nʶg=s|ۄiGL9b pPajk|X h{Z/yK|$RphRCWc:3ϒaQ{1N̋`[P%1$ϚRiE+]n 1M9 3MqL&υV7G?wQXu|hb[s::C^/`X3Mߧ * ܫE{I6vTNdf3 W&j}{-eڒ%)vLޣtl;'\;Ge¡E,aJ \jg[Zή_}CpOMUil7?~T )P{s@Yc%v}KoQ@ OCzm-+AXΛݵ<:h[iEATXWMNq8@ۺa>vJu* `*ڒlv@B8k=R5?7oIw(1? bʛszL7@%`msAتX|#;ub }*I j6!^mqY!_ɏ_!XgRWln6}tJtL|#e&vFE-] ڥӸ•xJt*ę7ܧO ^'5[,o?T,.|ޱ)|{mZԋ?5~BK34w0[ "plz pc@NJ̹6d&+3TNd>pc|t)>kwM\ -NYe_vɿCo J{bӌYEJ[<}BRG`AjC5#E}CaQ6ģ@?KPZ2ȠXV PHJljh}?ךcݖdEn٣ /` 1'AR%R-<^\jbYv"OѶ#1SvP@'υmz0o(/qatdh~nW4Gգ|K*B"*5h׳ r*KF+_/*!׏6\xMLNG`(!݊԰L8J>9P-#ew*OdNB&T쓶QO!^_\A?#=^\V5y" sZJdk֐aUOy4|ZPj!q-X|C=ua=XK:*yxb |*tO$ ۙ7K{rdਙ&kZEӂeL#?*5y,諾NB.Nl Eq1Suh rNgBC:A+rt,® v1/ZsJ:)w]S#O_|50x`7vlaH!vڰiBѸ$+pϯEf /I5.D?oUL b D0 M8 n+c{c>)#[ֻ7]1O`pK(L;4C5+<~X&m2^ÆLFG .xk(dG]y\|hgY41Z$Uv;1@/H KTkb %z{&8Aݤt3Z\gP>Fyy]%B+ fۜTvSkL[َtL0O=XY8`|hNw}vi?aCxk*"Z |nwWf䍾"'L= kRWw"|vggD>bw;b\d"Fh~/A k c=we3щ^J[S#YZƞ|LF=vd`dƯ+kAۜ Ky+r-B,W?^,50XrџǁF Smcsd/TH^@h)y&2]!Ozq9'hin(v+=9"Z{ Bƒ{>eqTSi϶ QF b9diA!-׷0Jd5R h|ظJ Gʟ/.ɖ)Ygˮ[ƸDc2ĐR(3pk[;tpGzչiͪ1"$^<`#gl[)H +`IRKV 1[+OeU1Z'm#5RC7'eii"p r}fKߧPeLT؋QZM5巙-.SI~/+q|5An@\6+D^RO!hwIh99=S ԨLwZÍi<&%j@(j{3{5rF 1Y^f"4 {8U*NJߝuY̭wT6+϶fK&:Yu:tEH{XHK˗䮏n@0)lEbܡSr#3>]Qv2 x&n+F|KC+6A NHz0Vh:;j m!䒴,* |/ScLW\;Cr|*5-JWcVwv.NH]8mAǼi'j<kF~(R_iGi# v*daL<ےAJQk9kMɨiy(jQa7=x.\>"HMYGa`#ԙ5 A|7}ǑsȻQ>+ sd}O#et U9k}}B M]$Mﻷ3nX+PX37RMn>;M^M`iNP1΢ERngȘ W2\#$۳[xj*4GCʔX^vˌN8CS51/$/l>]Q֚Z*R{ðkO;=_N;Eip57V %xN#nWhK&ݙ5Zx@{u94q%s_K$)|rّ,e4*sT-p] ڈE)x3П%j}TZb ^C󢒦bS~sei>fgTf<}Ua|Tju5Vgu-J3vl]%jIy..IF,iYiZ585j(.Nl}2u?) 9>H&1DH*{:&'Js`<.^_kلeS6gFxhGOr _ q2/G#MAmnEM8kRGL` Zvަe%c'g)l4rEPS.vxX)v =}>eVdHMAePcj }>$wv[q6q rOTλh0VLws=g/?9# UočUY$pE5,arfTjyr)n ^ NBPPsK9u7ai_C4PʘExyg BWoHʺ8%rPZW{KMgId2ڮ]`7jIYBWH+LEd0[ *uG8nѮj\(eL*_a(% n'Ÿ3 Fo`F/$_Ljͅm:ݟфGE*U 4' "]%'GK!R8ل Mғ$H4.xmG[] blДH-ؓWhmǴc9s۞ފaB״A[|!vGZ"#Tl=p퀆d۹FIUQF0-e4ѯyxۙ}ZڶtoPPYѪMOnɗNAp6Mc6ރR!-2&~)بisέ dziU&>_"@m͛`p#"Yy=K5v4PÈx7sBA49Ǫ> ' ezV.};#y;JuVܰ1zx|,1N7SOek'R;,d$B$Q)TG_qc*ܬ?!ҪOC(,2>_>z|I|B[fzc_g$κQl剼&9OwJ
  • .MEYfk 6NTrdegѪI!'~شِ%5硥=cA !^$}w1lR\sNkN)x- h9 lb\fyݟתiQϹRn.Ȱ-,As1=/ a&W2b-yEc!eW~[}mtjUI%cuq_*.9PG;ZhnW;rzhU h;X%4QGΗQ舘N;(#/PŦ{5ޘ%ہ~aK6퉚~}IU> Mwde @aÅʀUE$_Hp#h:<?>VBZ)JyhqlZ`)5PV(C&3R^ae؏#~(kS <([(J]+Şn+DԛSYB=Mo=+$x7/rmɍ0cd Z\oc\³*sZuQ7ФFfWZQ߇,B8+E~HNAB~1CJ^.&6iѓŭp&~bj0AXQȇc! w?1~S|s$&8!5˦\悓2A> tWyZy!ŜTKi9 cubhpKًؓXߙ|֫S.~ *.xOD{'.j@LYy%\E^6/ҩ"̆C!Ь$wc;KݩAGp@mǓm s3NGQEZʭyT;$@ x>/T!A~o@~]$Tو/$Zi* =jCB<^G}/6hWOM#eTDqf mCR8Q(]WDMLS8$‘)9w=Uw\I+|όM1AMN;&㫼l5ە|oG{ PR6M0264n&v2ae 9df֢[+ȥ?=Sѡ$+֡~*""Rh6Gٍv76n^{_mBֱq=+آTܤi3Av/MWTq^WBT!XyT_m"0r1|gc dV Q9>Xer'܇n /a&BpZ aɃyo|,i8 FmE8x C@QgWOڳoeZ2+hEu! ˥+K*YV8 > stream xeX\۶q'hpNVV` =8.w kssvꧫw11\Z4%.V 9g(;7@ZUU  ,.2P[XXEyE1.>{[;(I$Abom Pځ`֖`=ZphAO@yRtq+ p!Of Mf$l08\`@0/7lfߝN`pqr U S=sTj t|\|ۻ{Pk;%w O'SR@]A K{g+OÚsqpqqa2b.@{g[m"@7 sM:q`YWipZ:9pA?rx`2`K'+!'/tm?!a˟0w?ۿl5w@-@{rl? 8!QQ$*hC οI6tl:N8@X!?VIA@؍{;c[@SYrc&\[rf{!A]EҚߖNU 3SuE4rBZ&u@Txx96͒C~ڜ xC؄ڨև&fù8NQ[-KWk]ȵ*܂H/1?i?`vɻ,d\<-8NUEK{@bt^4VAKJ7okUV=A9,U:֌bh}Ek||rr|1)<רIܹVf(cX kK*#{_cHO^ eJx22q"zehi PX|34!v 0 &^Iȃ/r딖Ǧju(DNذFAwL&gy᪦/2ULZ Z\~J'}+i[N8翻Ȟyݞ;r\*@Fq'tإ\e3%T04BĘgLs,W.s.Z!\wƍKu&eRba7:&hXp@ %>w:Cjn>͈jQFc`n,&u~ ':f[2n/3σV̠A7P)u*.LU喾(ssgZϝz< A"CZJ[]BḾDGǛ@1Al1Ø29uaFB♎Cmz"l%e<^ߨL7#27g_ЕOٰ UysF(]_߿Zj ^ۣKEtKʬ,E[ډNI=]Æ _k;:f 7@l^)½T*؀"N&*0;`@VUmB"B!wS79)E ԜJ;LIt ;WiWDGOj1QHSk;Ê$j<75MB1{+nh5>Pe'jۡط? yXvZ{lIfO?wϰ!ܻy̦SޫwA] cNp!];d7c iLk͠#4d9]\YBjz>LY#ci$L攉 97t%Fpt)(o'"oaܚH1AFPIrʸ rtIlSf⑘[uc[hLQ>ϛ*}h\JZ5 <%{ܜcp(j%7 21F9{!돵R!-n%ogu&&x{5hN?[{Ic]`UVЪ(9 =DFi9w{E`]H+=ВCqLxg?XKՍX~?)Л9c6Mq;Xcꓩ[XS3hn\'8Vv0^/I|}JG:Jܷ EaM,L>Tߐ j3V+M2#/ l,p8~2֤ ^:Ԑݨ6'jլTLLA֫$3~@3gI/~j)W\mj/UCl&#O@e~3ImCnWs4s Ɨ4'ۆD+8$_4bv+[!ױ}L)4-4pT)3;}S\4!ż fd)]`lf+2K-kw㞽 Q yDjم{M^.Ӿڦ WW&&k$ᰓeV෱g=zp]ҡ GYEg"=guxugݪ9_6-`*c-,ݟ$2 #wB e{M9ў܀u͜F$9ね@ 郍/|"8qwʩ8*5/r?ca}V4S~ڒzYѵ#ՔK9*r@CL&ӑ E3T6 c~ׄк^i:öJI18{1uϐqX]jWҢ~-eQTH~*3RQ}~ Q{? "j_TW!ۑ߾~؆{o}@y ·\T)QOH:0n G:0j}Z2՟uV͢)䀬h:G^aDR ¶)ҷPL 9W2˘Ym>Y,xkA=砉]7!:!:(֯o+^ +%X9ӽ-sfxf @&xASD`JIX\U} ӧsF"3S;R BL,Ӗ{$%*k>sH j$TRR)NL#*F+Hp rx;:B_}=/Tt[P]խ*yo?Iy+T׈'U `g6T?K@/y0`mWV4Ca"&aI};cznPO.ŞR~M\|QI0GYn |LlUs]`<=/9,6*ږc3w5(lpYU9R$ݲч}QQl7o5wvih^bwzbw}T .PQ۠pd_d4=Rf;Z˩I^U&a}{wo&y'!EߗFo=ppьBB؆]uլ5PR{w; c*[C5NY0'FjFMKyfSM|FYy95$P[cj8U<6K9:t{γSFIG)3՜ ډs瞿owu{7tꃚ쏅J+nqh<, luF>}*xN|h% ':̋nBy`Ԯ+jxy2ɳ&VS FzT!~WÐ&y=qIDL&äpL ` f٣-ݽSWYê}v4FqEr֏Q߹T7ԗ>B1rn/47䂙?Fmo\Z{WO4%082Ɣ 斲+qe%;l03`}yՁPfpI[Cw%gOڔ,'؏8zo*q!!i{1\Jh 㨭 e/p'ѐ|=KvEZS×ss}"=3ܔ3Va4޹^&I># N#.aYXkR&1؏oһZ x%}Ep'ex !HRwj{KNbU:u ϻ[dQ(gƞf۷g(z+n|Të[Hit{F''o_+|fx rÀ\P HM6܄65YxPz/5[$G™ _,oHuSRtg=u: \9=~>KId֛N~Y)M !A) wK4-xwWQEp>u, *̩$FQen _̆HB[k;­fX_|xO|2Gx V1pS'V04ߤ2^,0?ͯe퐳~)6zA'v3wKwSv*SiJOm3yC9dL啀bbF MXK֥ 5#1{r7d/(FG h[{ccEώ5BM+5'Vu H$Lڃ!(PFfdyҷ,KW-4<WQ8J]DS> VTБH:W6gfֿ~$Ƀ Ils6D`&j%s~鞪U֐ Xqgs]W%9Bt|ˬ3|UyV` xD?J@Z SDn51 qWٲP)dlμbSt\5v:z5.+:kOI?{ YkNxQp p9 d%GY Tyw#IA,3h؂uKnxi œ秖94}֤*DB؃ <ю)\+8Qh$^w_^҇+EDNW_;R|]>T9$M*-~ҳ"5B gjnl-B5U bh rkoQD0puxˋ8(ct)flP˴L8AL7Qe~nf;TF9mR,ٜT0& *iQ<1|x(1!DTn|75xvxM'SՁvEf9[(yb!_Yx|Gn-Fa88 c.lHrNnj359`DG5ǎj4a?ȄxQ, d uLCF &b=EidJơzMa|*6j"S"\ \m*&};8Ws/! <;CD3KҞ'¬Z|ɝhJ={y4;6?CrOsg7MÑXR^ xAώ\;@\S@a7zxU{^a~>*U]G: V~ \=l(*vQnNO׼NJrxΥz:2֡<5P'f/ns(JM-Cy(R$}'87ע~xqmWj|+¤:u-)}l<5y{y$ѻ(_[UZ;?npJ@5e,K)Jfx CF;oӼJHܮl9tf^ Z< t(+1x.Eeb Ky]EǻGl's`%d+$_OvMJRGסRv,} KFk2`M7XmGYߖpH>#.FsX eshgNp4B;KSIGA-y[$'KoNѯ@5R|L`K_HIt9hkPg7AA%/ jyW19Xs7NR4<ݹ~)՚v m[H$Ƿ1ݮ+ 69ZAy1Y5O#?SBC`L`buDn۱*\$k,O+0r%!yFn$ڣvWF뾋 כopeE*R)Y/) }|H9d L6HKߒD7=M$k>h#1"g~ {7.OFtF>nb#,1xO+Xȝ f {[u&]IjrY( ?!` YB.NG)Oendstream endobj 73 0 obj << /Filter /FlateDecode /Length1 1006 /Length2 4911 /Length3 532 /Length 5592 >> stream xgXS۶ "HG M:!TKJ!4wJ)Ai"MA7}v}~>wcc~cs.nv}#!e{-TG 2@U3,,"BͭBP0\ Ң@ -nd$eD$Iw?$ UC$Tv"av8PrjA\F;' Tvu 4zBP{aR0hCm08)OZpPϰHO) /|@I{hu "pݠ8/Y\UG_}_   H??ͩ \ S;B"`0_> etzBšpZ צ>Gs_ ͸ a@ x8!v{(*! ?R1C}P_c0¥q# : m ;K\WDRRMR@o©")0D; Ninq~".o 9 ŀ ߈87"@7ľ?REXH \HT7\8PZ\2)H(O] wP_GlsfCdEzsB~ccn]LJ7dafg/д7}Y6Ы# 7Dh e`@|8v H8RD_>P i[C`C2Xx|v)/Zb8ߵ]r %Z%[ ӣ*O~gZ_cy 0ʇJFy0Wp^z[ݱXbD5'I vRa+nqQ-5dQQFH}|"jaEJ0#_-$mĪXH!IV8yC;Cf#eC)QqQj\Žg5߿*Y)>! Dr4:?7hb"MJ毝+49/6HZ@g&~niJ7Ek#S#Ɖxy_c{9iʱc[5b#Pn3[Ě,8P!"0VMd9HޢfwAKFCv?&E+bm e$/'__k0P o]!5UsjkюsMxz3g"KBpY oB*F2u/UY)6;Igbn]Pv9S4ׇF˝v/׳XVTwѲ*K,MA.chu(!U'S4kȨ5[*7-}9Źz6Bo4^ |9d5O?#2*X L h h+ӒI1zۗϴKCÂ2VjhE0#9ΑmiYB]Ǔaj;(pt&/|#M?&Zn_(+Z.{stJ-x&r6JEcr4S V{I]rOQX [vəEN $]v~N"Ť.^SOY]lO{R_BW?i_ELu@5U %Ok_*U+ :|Kq;HƏ7}ㆌ j(ӈ[GIߗF:֯5,foI*Qw_f7`4o $3UJZ&,xu, %|^E9n.FC5E3Doڼ4(\DeQ-WDo@\cE{I]wG( *ܵ Rx3PaV-(`M/me] rV>:?Zvѳ&o̼jRʴY(2m7+zeO}uʠjQmN*·( jk Wݴ>&h aU3R$\>g%;Q`"Y# .QZmO D}3KP({\c 1!Сfs} hV2Koz9kK{m# b$BKyzb%uXy@mn6$@{̬aC,qQȺlsz' RAr1^ꄱm62yi&́|mဨI͚kcDʣDF/)CoG؈G>!n~ziY}C^##F2qv1vkٯ΢<*Y_лъH{:2ya`"kS1݉X/d;/fC(_5]`hS(4!ej̟^n|瘘~${ /+@!,TiC cj2 4=[1rH ѵa+v6I2k l9CZ2Dn\t8yI9)srvǯW2S~ޗ/o;OKu^}PY9](3^ªqBι) !lݟsMC݊l6d_޼=l#V=GE4B)b7Nz],P3K܏,. CGSm.mҵуܮSX-v)ʱ'rS[g0jU^L7",Ǎ)nZƼުbPs.:үds4 '3(h~ U)[tp7 /<gO3f~i\XZ/~mXip6ڟW⊧2jS3ҭ,Vߋ3\iNGukD/v6 7+ʛ\'!|: gz¬m4UQD0eMYz_ߘNPQ[o.m0u!-x2o|]ևk~?ˆA+YtqxLn$ybڹn߲4(E=4GAڇ!`Xδ׏_o1ɺsLIQÂ)##xh[mGٽf?(?Ymx{V~DzqI=05 `3M Mb&g E #7t'\t尨1Tyr*!2~f[B$y-ӭX.m3)I=;_Wb(A*A=56:6dCڄ㊜|H3RXZ'CX*],:/Oė2[2wr'eCjGq#+g "10\u |q2AF[dB??_0TlhlzTAx/^Ux ]k) CWj.1 Rdݚ*JWdb0f!^ c`FW9<{ȷtWC< iuCs+v3Z U~qr%9b4\[JLw\}&n1LmhkwDh.x?Q {'sXN"a$le}3];}q$+NIWm>{gb*/VbWeBpK˺{,te60iݨ(-^;f2r-f;&^Z %6b7~jZΟ,Ě<:-R ^y_!b yPr5֐H|GnKDAէR5sE]_s l1 u6샮0!Rc%~W[mӶvEZxQFvp2efRQ,{ӣQNbx +uwt&5M;asE-Fa,@3mD Ci[ {bm/>IIj]2 : (!WbYCz86Ս|ꦋžLֹ\ܦPbYv.}Bv y!q.a8RpQ`,04fV[ŝMݩE6= j<ߖxg՜Wǃ0we[FaA TOS7\qDWQ}P6>t td6m8!' عB!H t!/q3endstream endobj 74 0 obj << /Type /ObjStm /Length 3334 /Filter /FlateDecode /N 65 /First 571 >> stream x[[o7~c px&Nص4<-G~CG$;NX, f(3>w3__?1]% Zcjx1csessr HyT쎯 w4>x{xqsB.yq0$J%՛YҜif49\8aEٜXzv>Sp@o N_o M+Tl~9sP+jY'R- ]N9(C/S|0SQzX&wQ1#GZ<E5=I̜9H:%4JyG8ƠK|:vHC1rټ" &_LCG:~A9AOwyqvzoWצG!z{Yb:On`/> &w;a"cNI; X4Y`$ M}{dsMM*WncH5dS5jn&Zip{x-1߃lNisn\&{mnrG 3f~#qEoK\`adb'ڻ& Q2}bgwb~9:`//'-f_$0`Ш1;òװ|8/ھë_*>hXSv\ʛꝂAR8g*5H rIe 4T`1JUє"bT,c1~.A `ƕpո# sS{@>:[!5b:@p/$8m9&)ioo-zJ ӄ[3IEE .ķ!s%̅j-j7^2e!TilhP$>(SF!l S)i,YAMM^~n^4=2NhE ?Cil|`~3F?B>@*>Zh>::Ȓj=gTڬ×%`P<5W[.)Зu[Ƨ0!QW5!j>ŸpT1evpy8SeMWwݜ%Oľ8D}%rf7:r%2=C=&";":Ml9j%[HDr- }M!5 ފrTߨqґnlw;P z0{B9[HEޝj<'RZ_H+vQ:SђC1NJ :|=i]ރ<ͩ~,1:|I~{C=zǔw[ջ뻒dͻ5]^.epR_}5m.[ޅ?A(keq͹C *KM˦yY>nzկ{U=~R?x;+#[z&,4-t[N%7TzCuCm~:=wh;xqqm"ikgcKL_`k9sJRH9s3krspzR/R"&[αִV2ϵf1_U͎+Ei{}ʔE=z#Xp#]7}Y 04 h]fC~Rt:Y=X-(P{&[%#mЫwr>*qsHdmu; =ħTJ5{oX+g5填WY=>?/0hFf2?+`k5cJHbѭZn[91.:ICz m'՝X?0&]0 JR?A{Gox#XS=O} 5}o)kͶSѝ֣RǓJDduыE EPÅJˀꥏq90| z"鼩|DR'݁3>C٢0jq-=nX?] f d/lWقv*]J|ܤ8u *r[zFh\Y%hUtz[[zu9 GOWCs^;Ez]oQ_mfh =u])A:yJt=$W6PzF.dguw,J.`rռ?7ИotVIA~Ӆ)M/A">ӊQ㖮N_ee=rx=Mҿ,WXH5Y5=A!FCtEѪŔfC<֝^eѩ(!=y`0t$ߟu^%@,ݣdJ*Nd/WYz/e|t/e݉J)%Q^09?aendstream endobj 140 0 obj << /Filter /FlateDecode /Length 1348 >> stream xX[oE~7֢;s !E28Ľ@=e=M"9s7 ?@)םQV@۝gJY١Jfٝ /Cmwr^)-)ߝ=CA_2x[ $yY?-ׅL#Eom*][DƩbmZjsެD+R۟A.Y O9"*{Q(k%hBj(6C $Ŀ>_Qx?K>ѹEV8M2NZs &7_7~WaIʭ67՗@uCy"R` ѿ!endstream endobj 141 0 obj << /Filter /FlateDecode /Length1 1136 /Length2 6300 /Length3 532 /Length 7051 >> stream xgXSk)J" ]J )Pz $@( {M]t*#wYk̏ywxr+AC"(^> (sOƦQ0\JEϝ1_"6 F8JC0 0FYC1`;6EZphAH(H@YP+%% O%( c M.$s@D@ lgKFe=$ QP$@" Ǜ:sUf!xÜܠ ea 9ACti> J ԕk 0 zN~3EH_ug nH;?FJPX)!P7 cG0[x,H<l` +OD4~E@5,,Z g`DNֿ#/f>U~0@?M}&[s@P-c6@@1cb*@( B?Sb:?L `\h3FH,#<˜ `Y8#P8sŖ0BݠD3 !E>JyŷS צJmKR#^BɃm`%wTˀKz=$T\oӴw,6Lp{:3fwfY\ZoWDsi"nMM^!N$gT{Y_rC7J;N*;"@!nE"V0&ӞW nk(cjlF?(t%Vitn2^|5 +֩j6[ +U^5囹l{ `Aa\= >}}AUqqɍj*z^MEϮ=K]#q:hqՁ{2nHRK'ڇI=:[:CڞOZ&]۸uByb244?*+{bUFM>!O,4!jdAdyޤAYa|,K\Nx̆a̴?DzUgI  8"_NZ( {~j8PM5&.I':x-o*tlȟ.Ic5$CAlfv a\ngUt4V~c4`ϙ<=?DN1#|3h۫'5O˹ؾr&VP$ ]h\1'vtGF-ǿvBpD:On.?- bLC3ˉ^p*ZT*2;D>g\}jɤ=>Fi[|NUTq²ZEtI3<qx2lÉà TIo$:Qz+]\wkz|;MkVu1U.{M-g țpL*TZz2!~h%x,ت]U鏟a6$A!Q>c0!*m%qۊP}[Y$ Kgp,d@ӥmzOTeQ (!r T?8v*Sx )fz &B#;_DH\{P?gԬ rR7Ɋ~aZ-68B,LPM+QV[dž4Iԯc5׻km',;&rʶ!* >+!ϣVXZ/f$LF '>-Cإ?˓yiJlq)\s]bq,4y&T{E^vTZb3 =Ȅܖ)<)tE\|n I7|:Trڪk%{ OB!zˋ8`F.w ds&w~ z@ -:cdg=z *+$"b҆ͼ:},[WB(4i$< ]WԿz7HN2Ւ=έq0o3אO8²Y ᚐ]9kfڔڏ{wgKH[I7\H"}Y[xZ(!ړgThLѠ٤o|:v;\l 6& w4Q|*@e,b/R^#:҃ޖ;Ji h,#ݫ۱nªkLi1?){$]%%_6 BE䚬X>$Ч PPo`*F8V`tpž;7ʜ7R$4=UP/U!r\/_r8gg1-Մw0ޱ,h4G@)CTkib6ƠI j)gk+%Z'| ӌ9|aڛ44䊸yYw=ukZ\}m "*ِb]"љYrHّ-هt%BK--fҢ.Hw#5.%ו½ekD!"W.%>xX)QL߹6ذ6WoE> u<!ӫ`F߻ N㟫ަ0okn^V4`93ԻJu4=z MxtVaizے{xk~|y0qc +X%dSC!@@^$C7M s,x\w'PDcdţA]h٩5oȧwjߠ] ΐT5|+btW"HPlıa~k[) j +#UOb]{sdt?ؔXe$Kws J؟G{H0d+n3Z؄g||ZÅ>CMDIٮ\=|,5!GNOsJ<kZ|ո1F6Yrtց[h=[[pw91 M(XJ~QjIJQ ZJPWb:nֿ.=wlqg,I]KI+VB&BdmQGx =_YԪtx&f vh2h/lj0ڐ@)~|[duYgJb 0ZB)(aRq $e,N,oQ0/ih]Frs''aD.1zWƽlzִGs٭\1ߛ턜׿#drf yĹPp&o/OiVp6O!KvhYW+QG8 ,v&^FlPoXeϝ3Go?*q Q f_j5(ҖRߜؼZ<.}>a7&lCܦM#||)i+Vbr=nW̍mv$~kU-8cV:N`v {-EDpg 0/s]ֱ;0fk蔋L! 4ccljyPa*[Qv;x;MwFgrTDž2 HqOC/5jhca<;%1A'->FsK[D^'㎯;` R8Ŧ_Er[Fr;-a|[ڡfٸn RV76VYufԡ|i[BJ6/:&_n kFY> h/PΣ^9[r]O)_},#vO V+6VOo֘ėvgHK_0RȒ+Aή/&>qo#凒B .97W'㪪Lmx,9o.ĄYOJFWFh9 j nIA!i 2rV&@8ڊN#N5;_aK\iy>ES4C4mtu6=/)<)iV2 ֗=,DNr(,+1J|_Re~%* Vȳy+,c'lRdE d^7ݑ%qT!XP._s&vk99vxkeR?_CeAxT mqkGc}$$$2wg\k6o4 xS"kAHO.#R@ W g+ e-DXZ!Hɮ >t53*c({fЖU`j.ݓ)=v^tWdA--(.ٳ "{ 2}te7rf=X֡!"7І |Xhsqab<rg -_o`)5N}{dfѳ险84?ώ {zt -;9H{>{Xl!PN.x3!L֩vF݉`g:|})11 5ٓqS^4)C+*w1ٞ&@1pة`?~WD!H[3Eendstream endobj 142 0 obj << /Filter /FlateDecode /Length1 1022 /Length2 2639 /Length3 532 /Length 3305 >> stream xy#<1XxO/2 $xH}z*DaȞScAjܰ u C4(K \X"xPߋ9w A3Z"08E׿~>XCZ!`PZ qH< s`J@@P RЏX^'R)+2Q( ӏD낈9薆e@ AqXЕq 7M mv W|KTiH<$7!zaT(VIJAos::xY)̆+X"d]աC@$qvUDQ~no14[uEEְD:l~D΃쳌B{|ě׬z VB?\[0x| [xT5Dw(~{KIsc‡Z||CY {)2\\Qؚ3ͻ( 9˙0",=H͙5|=Nɟ?t>)hE 34T ZIը` }ħ_L>3=?-!8vIԦ٬EIe\4B3gf̙ƮxLȋ|?ɰSb$xsbRAYo ,5E~?,ۊW@{O@p-U E y0I Yj\a>+uQg7CKޠC)1(ճFE,穖e## w%1g+$VhJ'[jrV:rͮm0 _G={y1h,p[WwgJX}?tz:sm?4%j IO-MYOPwӌo?~Z$f3{ Ѻ 2rϾaaf,ɖL3el H4jU jkTkn?}I3%qq?=O'ť-`qW5g4RMPt)RC=]; +ݫ O'=0WwepM/}`Z4'{ʽ3TG&[@̊(sIcL/+asN9ٴ_:o4ʆ~5 etU  `rln~8QJt`ۤÌ@F-~Qs8H՛!+YADcS.zLt Ĩ_.)6YEFU06mpto!Ek!'=5.Wi`L0`N%'6Vǭʝ;߉~8렃"Nz>l37tn>籉H&^I6DžMT=yR-{+l&"2Zƃ,Od*UfJ3.enI+"^b/χ]|>(oySYӸa4@.<=}5Xv\3gX"1n.Hgf Cwl~NoDv}/oL9\NIkGpI'7Ys1C]3w?uK'ӣM7e%tm G:UUnnlQ0ݱR$Hmv쭪CVVَRk>jc_(JxX/ɡCWǛs--wWToF0#<(?wZXlboY{}\'ֹ2wM&v:7y[ң,|rϓM.CʰJ =&AF]ɡ7鶉!sR%=tGsM`{2FKj2v"{3:=V--{˨Zr*2º*:0SOySwbMc^빰nީp{zDh- X,)_*o]^Q^Tab~ƅ嬄Zyۑyʝ@G$݌H}!vXLgե6F EQR 鼞 ]:^Ep<}oZ7wIJּy*8/unk5[~\zx]$~kHE%w#oQUwkFx+vHdRZpanŞy)zV6K!ݳ[\ xZ5b0,3Q;^> stream xSi<}ۦ=BKufd2ٓ]3a03blB)HH,ْ$FB,-[eys]_8ql.%&Dd699~=f]!^ICsW%UXo$>z [b}>,Oi?ёw&)my鑩9y vWRu1"0gIʋawl',-JrlǴ[U{b +3 ͵b2b 3KLLEtx['4vE{=!oxh4+- U fXX9@Y} k dGFlmrͲjze7|Gʉ,WZ)aS:lzL‘٪(OS\&Qj&pmeO}^oYcƭ /vDc7;u4sLNn&ag/ UCbnw3O)vąN:!Z4~SR㙬=K2aeu!r9:˪Hu$ )%aPu6nglO9e'/b"^,5_=Ìje>fC咠^UWYSۣF"!-9:n}PP-RB,w$b}˳S Q.W"B 还 [ހ2ӏ klLY8fRAF9z hd;#3~Z}=-f2lʦOSt9ܛn ;#6+?wL&ælI\f9'Sӥ'aؔ~ƿN=?>ۭ$X"X::$G ?Zݷa0爸&ME㢇NU]h;^S`޴RIbeҧp}]j^4kܥӖ ʫfMmg`ݡ."ώq?jh#B\MT]6q-c$zBQaUB,kdAOhFzwB!cIpe01/ F[ j /ōƹw'cǚs^Ȯ|KST7}dI^(vΩ;,^q7>P&s֝oԪ'ֳ;X-v!&愤iL=X|ғWS_ ըfS+na}'b/\?XRV[9q Gȍ~Z ́;JN5>#(8;WO\ԋT()etSt$g%5(ݵJT{5v;rӢyku7K#tWl:X) gRwwj)43&7X[{.:LJSDNJ $Ni4X\|T}aMΔNͅs~N0\jsa;%kʂ~/|ʑIib!n#Z;f}*:?x}F s1wb Pȗ6WdD\7oM>ǷBMLq3<^-! n=gy,P>tXv6춉mX%Qԭ iT_Y 9##}1ke0w;[״84 Y?WCō֘iH)>GSg儃 2g,?\Kf\6Wd9.; $jc҂4C|0Cmf3523"~rFw[o1 զsN'yd Yv-d6sqPfόQ+,ʋ^]ʿ)b`m/%:8+x|k\<^ƅz#<1dcWr@G_Λqe"A ,{vV ioB΅O0G_NYpYbv$8<ĔMfm'&?06EsFo~ge lH|}Yc憭[}NVܓ b_U$O!>I8oK>pòKgpzʍ}u&q.t6U4&N1'1m%Pt*c:«IWcXib5AY.],j?m=CY*6QZ,fYdrLM dL|< ʠMXՈ>Y¥ *̆WɅ, =L*(NyMͫKn f+Y̧٬eJ+ߌdz&Qԛt4jiaw0"XSfҩ|i1/W|=?B>BktOў )&j t _fk,ɤ*\;(t,hG߾6FdG.qI\+]2 /Y>^pb5&Uvy> stream xSy<W$[+cf!ɾdW0fY4 3YRd'DBTRG$si$,iN=?~>>y)ZQtm0wp bNtlF2 taFH `N aQ :nd@*&hzHi`D!t`J$NWRCA 2͑-G ~?S 3Lj0 #?P̒` d<T*m/Bp@ƂLdCu:kM$P! $Anp›ɼؿ2<*H'JA .r;m]sNM {w lfL).֝f`t:KӬ8CPGeҴz,LeSRs^8wJ+ Qr!8?\ Ҥ`:F[`/>yw]xsͷ'dOǭO .P>w/Sbɲcɑ:Gjs7s$Aԏ&:0X2~.&^p9~섶ZۚybkbNHsgbtf2^RK l61,Iٹ8gZV̛~sStQ6¹9p.sgNFҞOck[$[bN=j*Q)tvb+Í쿆xeˎ=f4_:yLڳ'o:i1ho / TyY|1]OVu|֮oO4:>7Z"3z_^ՓEQ{/xMWHi VAe %/eX+24C}S*n_uU>ES=H}(dT^+g_U3Z۷D͎HXK{[vݢQq>2ZR`#dٍɑǍ{p3.Yo#r{o0E\qb˦P|ʼn<+bݵsև/H) alTc>ڵϬ0I =@ݧpO[a{y`;)Gv6c|yQMGë:Y{. ;i&& gOy߸s1!FCa(5d(wFɯ`GYPj}WT{G~4xƪKcM>n:73B0F JEgQҦG\uᠨ P*Y˝t6Xhڼ_ 6A}t]@(ǕYJ`NͫH}pqSџkOs޻Д^ټXsEs7OũL-d:>0R#/QnnyD)%`)ҕK>L xnAѣ'|,=R9VWX{ϛtMΉY2 cV+tm 5S~A1^y5ko zkfTsjٙԓ G~=:d-4suzV$oWUڽ&GLx5,#D;5~u !-1ukbPEx1f3!涕䂖DN=Wev*RkQl&{ږV1h~74 ߽wHz?yZ{xC6`fGzEnoCC{,[ l VO0M(roN] X-Ɣzmb} QUl=k={,HѴo136Sk`Bʮ AT2S)koS Mo$";tQֲQwKwTqQ_&Jx4lDҔ-=1N|sCqo2H;,}Qj9܅}C-A&?T[k;Bd7Bۼ% m,4N(v~}^w߹LدixlRZ]$_Զ_^\Y;ܐzjeǬgoEO)\ JtC/ %"r1K OcbkлR;5h?[(n^+1mVcrqʠ5q馞qc7 TսK,vEV/-Nz Tϒw<`yC?m`R֩C*sy;=#z7qU,UɵoT8@R%ρ^}~Jzm[-I.!@/?!!h*BBSO@  endstream endobj 145 0 obj << /Filter /FlateDecode /Length1 735 /Length2 1133 /Length3 532 /Length 1681 >> stream xRkTSWQ*b)E kVDQF\ś{ &*֠(ja%Z_>x.P"(\pY?g~͚swl'*?#! dX&=)NN\0 ,pbloZ bR% K1aBD2$!90D(A2 Ax$S@BH0J&amZ$+A"i)QAIPJ'$T 3郲@#0TN@8 }!t ",b<ߦ`V@">L$"*z_ڼM۶Gop{5FHsWLz Γ @r;}^@T` qBAFE`:t$Hpgy(r)vL^>$/ `qB x'oNL/^]Xq&ڮX MʿnNu.]4h1ro|ힰVTMIǛ]c !ʧRGjxqc}K-Tm_o]",QMꚾ`J5pԪܵ96ET)9 tcVdbA7f摣JmӚ%5_mYiJ[6yg[vAc?LܨrU u dZYmgoO8zʭt4\ ;PX=)u-j^לsLu*y_kk}ѷy ?F?ӝA38`W`U0ͫsxpHaVVoN'ry,=A?C-#,ԥ*k*sguYrGQQu Z5:])saṉ/A; /K=xo{ <-"U.[OGy5Ko&m .Zb>bditrFy.ڦ;Um,h0y{Onښs8J` ֌VF^sϣ;-*d-wkݛ 쳦~in{sE;NG+ǎn雖SCV_Wܮ051߿c/3tWFeu:p_!k. 2g ~~WVF@,Q,^xBS0T<+hpwPsS!A_XY-.u^gyh<y:G_87˖}Am ]TfX>{;e. Ro0y)aHib|-Ijέ7 P!qX ?6i~\sH8Gg~d=bV]0U~=ٽj[eɕGO1u6rUZHW;|=w˂^aTy ! p"Xendstream endobj 146 0 obj << /Filter /FlateDecode /Length1 805 /Length2 2171 /Length3 532 /Length 2757 >> stream xy<J%!d y"Y̌زXИy03fe{Hፔ&)kYbi3}?9yu}Qchʄ .a0AeecH`iTzzhLWׇsM1͏ {y3Uc& 2D!0A_n@hD2di AF H<@/2UɂIt~I~A? PT$H KrYXP(X[K4| @W#$|Z0 2CԂ!f`dKfOܨTү$mp@fxKƏT& #b _+_2i$2 @ a( 2`0E1[`OCa(J^S:秄(D(kCh:h#,|270ҞdA0$ > ΜĔt]xN,zhQ׊@WUCf-O9Cus0}o}R^ƕϼk||-f?y&D\zd2L2 7F0IQr+\g+&UxZ|)ͅk^}H IQb%_`x7-KJ([ܑ&)-Ң4bkL<}=_a/[9u>@^3R*aA,S;<Ӕ 5;Y٩7澮V+|PXGt!KT R(-oD=%-c.kH%o):a^w s5޽%lT]=bǘhOrƬ/S&ss7Mjۦ79`$10.͘i:Ԧ.LL XbH\+$uV-s(D~ךfG_P,~OZEt̿D 9X w}h% :%;e^G`|R9̰}Wvw+D^$wlk,=G^rW]dyMĶkxmW"}R :F v8^h9 2^vN4Vҟ-Sݓfq)V8[ײ?hx MLNDPRj@ϵ67Jb@}h*3oCSpjVnLBз̋U,tJ3ƄCqiVRѣ3;Vu:) \lw;?8db*͸+<)BOO)ɘ$_zթap#tizDl}r Wt[ )hzY$,7z:tDaG f܎vؕxc4 `I/SGvc/{gbJlN,̊7ђijHO@6R >0b#Tg{(H6g=5aS"17 Fx}؁i&MfGխoMK.hˊx9ʯ\AGgO#L& WpV. (wp<|ve3\ۋRz#ĭf] x4oת)}P/bpTxQǦa='W"2>.s[(պ6qt'dpΛ7D^)|Jϩ-c}a,gE<ҽ=εLd׃YΎnw㒣OJ_AXiۢcCjR p>!)@=(> stream xZY~ׯ^@C>$F`5{;ʶdI*MpW+DR"VQX=? ,XiOHpʝXxB៧/S!;Ʉ? ~f ᱋R;SwsXƵt'8Ó9ޞx1n<ΠY,;VV}BVvgDjc,wdy.NvnOwHe5DKO~B*6\ ͢;a]0+.r?w@ƝtW_ru<*1:]/2]a*K}r{eKH2ex@e0lm ?YMOvy~kSX㽼$cҫZ'A{K [^[ ?.bOw 7^M|Kj*=_^:5F|bF_}Nfq D ?-/b(%d7"ދ&I64gDH /2L#ZF@Mqw10@ol-2-JE`3E|͗O݉ Vr:!U0~M >̷.$]"\ȣjHM ۋ1#7y?R&]O^tX9I٬n">FM^H7fMZ6FDR^pn`e1]ޏg,ezjyT,i,9mLRBۤM:x'7{ XkdO%S)E9Ɲ/ Szh2RU݉DzRa/'f.|y'&JJUa{`S)JK;9w=I $4@MQ P]tsN]ޓ ;8OTd&_AEf~DpwM) +ͧyA-l{r%;&L3gCs ĆR Qe4wD"A踷Kd;C[Sr\9b!_QQxF_x,hd TwGhYf᜙slj[$S|;XDž3{>syginvZx Jz@RdiSQu.n!=PQ,Gu Nwsզ=w1j%{U_bc>ЄLBX|Ԝ m6(sVx301[*\)E~~moRA@%|@&G^#1]"0b=?0ru(X:IU΅ nx xTFOt`&oߕ+xZ2kW*j/-ɁƎk\_30LU45"|^bgNulv(hD7Bq{!JH \椚ϮW;`YQspqxKj߷iN Y2L, MSRSn /ʋ=usbĪ3yk=y`B7J$8?DHd8[^縙&<y&3eO߬E!eͶ`վ%G#3؆<ٲM2/ {1Vԗt)MR1FM+U+1E4Oqnq'-K"Lc> stream x{~?m;6O&M H(mm& Đ%M d*@RȦT J` 0"tz"@0f'8!9,..`C8й\m~pE Pd2b  (!{~-bNsh/S.bR@,r|n$PDgR DNn:x0q#xp1Nzޜ@:j/fq!6#ζN>#;!yHC&jokblFsR9TK65{{ ԓ o>gx@b0TwDc3) p2О@_!.lJ{l=:{eAi#mg3\U d%*ڝs9ELJ|đ|MGݢ%&#< q+j: 3sj*3Wxӧ~rbj3P ﲺ<휯|]S+FuXKSL,7  gŊEnEvi.z{4},; 3/rČs<#|x\E"M,5 s"Ӽ--ziҧ3/}(q]/|q hW% xayf30z~ŘSyt5f':OL^ݡ"8tYʾO.;D훭omw2œ U+R_ǬZ$*JoЏRoHTsohue&xP?>'[$`sA,Xp(7UWgendstream endobj 149 0 obj << /Filter /FlateDecode /Length 2951 >> stream x]o~' =1EAM1Ї\ -'Y.);]i%;@^d|,y{7kQn&l̬mtg7rn?,{aX,Uwx=o!Q/7潔 VWFz&jva*L=[ʴYJԷlgyg:G,`b_ 501wWPdc;9yEJh%g2V+/MԲQLrK[mSl򦪜t;YO@ݪjs'B2!I1BR%B8Wl]+Ԫ_R{Z]Qg5 F]5fXuЀ[=[&P{KoDks!c?>?pBJ&%f#E^!2DXj/Ǹ(x>'4xnIv SK'x?- >/w9^;B }Z{ [-d7& *'IJj*-lx\yy<=?mKg |_uF r;m"l4T&H* j,`h Z7ʁ\ >z'Oc ؀pꂙBH P ieD}hG Zբ:n#wÚ. %Ӗ-jJ:F 28GbeN$=e< X"81pH!i=2?y3ڧӌ`,Bu)3'}墻PuҘX嘡AoGqK mrXfilY LUKquB[Ƕb$'.sp]tǂ, 0aXO#-.# Cj^CyR^JvJX*} cN!Ήf7]`aiAdr"D%w%B1!Э!LEJ8)o׋m 6S֕4ցZUTʀT~0XJ+Ӷ}ДC'Mi#A:n6l˦Z*. TSYQ_J +kצVgu4%.`U *ߤDsd]trdhQ-Wg?UT7urBZ4`VrPAptKY>P- ֮M/-|%˶^0”ߕ@6&h.1ɬe7RWKTg.>ON;c}zmzG>Wk}4}wRRו58zNW_V*ζiOYiκ +]_ҥ3Z@(ǘZP :W Tj{eUt!*07*ZVUR).aE(TOy\䣤l\%\c_i?Tp{5WyO6JD/=j)WA9y oxz!Asgb_6V> stream xZ[sܶ~ܩ`3~;y3&LN]JV#jĕ/y_$Wr̴xpp.߹@Ο}:5u;;$vhp.l...!M/~|K^t]=;}g*`(DElxMs'|IU6jz^$ 2h4}TxjX?_/)j)TuhjYɒZ(i?))ps* Ϲu& 8S$y^`^FfH\ tJ Kc\2 4p~" Y;td&3#JY,|ڻ|lL{MW*aQM3pcևLâf x矪fZk= |}j|LJj8Vi"֜ .p TN}XjIjfkE{[wۇHC{ _.t{33ZОWT1űǼ-p.[.ùzLFwYK n`bWQ>=C$5M8j).']Zp?q+)+~LG'0~U% KB>m܈rTG茋zvZ/A ,(OcBjC\~$, 2Yȸbio  QܐO2@FH- cMPE1rvUo[]6MiT:dtO+vؑ2Rm8\ǾU˸ICS}K e|vI!m )}/.MS%"-95%k Œ<~D%ds؃ObiL"j!b-diM eJ!$<&K;RI& )-[C gSC=mW_ힾNZChlLWIJ`7ke :qQ8s |C\m[d<L vN=ŦF|{9[k`m\<7wv!s?hR3[ ; `C -[snm-q!B&B,ݭFLBW ]R~@U@3O Au(Y7'VLŠylH_At]qG=Ŭf=lӏkj%/8 $R4b4j?)1ml!c,1`@lWA`c+ wȴb(x'pgO4 GaQcr/^<;4~Cl+8!幄$-Ħ̣#П<endstream endobj 151 0 obj << /Filter /FlateDecode /Length1 1161 /Length2 7231 /Length3 532 /Length 7970 >> stream xUT\۶;! .A (*H $C>g}}97F[}6'+u-6 0" 9فB)m ')MO/9B0i3G((?!^>! 6=@ n熀Zs0I1 f0;c 3 qtcH4Є8@0;6C-+( /O 0K8_`'^r M x4 l`%6*Q L.dcjfW+?l6nsr *p0P=ȿ̩@P'\Up4ZHl 6 ;'ϿPWXh`if{C[MQ]FEu3(Q'oÏEB@] * ¬\|3 @a`+蘃w|x\\K_s#?CpH8,*&G c/pC & ''>*B@^|y> p8`.|Եy?Q|h|r=n࣮?KRqre 'EZ8!/c7[B;qX`L-㫃yd1;H o LfY~6҆PZC )s-y)%<=8 30,v {һlSC7fWjXfiϖJY{aߦ&N ,$iⰍc!K}׻rlLNa EyDtD[,NfW˕l`{ꙷie{'U۟F#jO #mwP҅xQ)GeLjv"$J0Ш)9 &'gq͟! qO}b}VWCҳ~"l%}tXI:M[3* m8:Jm좿tfJp#jv7}눃HFۓw-8)Bc/hfسg4,?UTnG>XXDQvpuP ~,|B 3H)a X}yY6mC2RPHcV'  #byʿ:" bkHM轍"XL +*ݹ"SiɩY1}/5˚}'T{ %U /50?'v-VMLPec]nk2Ķ{υk8s,l:/ш*mL?'8S"z ;N^\2f5e3%Y/ષ?Col~[}r`;|Wrxu'(a'n5V3V^ ߛ$;OvB7 /Kd#OE6=h,5jGSS4}_jE ^sWTb4{v׽`1u=Qy (ϖ%RkukL;s1&)m2gzEF%y4>2"VP:#^G:E?R{:$7\TTpYb:;hn\^.6uoj}~kd';ts}u*-4.?a=5w|ɬۘXV. u $'t(L) ޑC>+ӵ+7#BȒF\j@ ÞGSJ&[֘wݲ(E:Acnoֈo? T& ~F#}z84D'!>x[z+F_R )K{>}' Mq~nW:TZy/p)]gLz:0bc6{Ir=<),+(aҁ9~p^phh1 Hf`{`Pu,K "NU&T}vH&V\M>I(󐅡kU}%b \,+峏b.MOtGഗFFxW6 x Bǽ1 i,sSؽRxyzV^`.6 #eޅK d _j_;YELkCצo-!]Z;MCت ѷx˕8vBr<~% jj~/ ܛMvf؉Ssy8OKlQD?"}vYl}LYi[<аJcr ڭ*&] ˜> "Bgr>v0mkZK'YNrU?hEs5kbљ!J6)gDd#s*S\lc>Gd;Ջir!=|OxcyT^!dFJ^懯;Mv@y[*1y;v"lǁ@iV4iwd[M0Q+% D$UM؜Ve KT 9v2'QV@u>^< i-8vW ,+HϩY9Cr*ϓ$x9VOAi dʕ%Td2`JV)]>/z-݆R˓ƪx{厡GS4 )E+FD51N`01ˡ>+~nO6E,V@/IpZxN-?HSf?|2پo[8hDb y OQRf)N_5 !c-έ $؜лoT^m# qzmgϞ݋zG?DQ3%#Z=9":B"T9ҧqB][u+ț SfJV.рM‌A>iF/iu}z8,R3`΀"6[x5G(hcy"-kk=Eތ ~LxUT˦xeO);w$SѴ b$h%7k.maC!5Ӌ&¯T>4 FۣG"#Ok|Z[/+V50xFq+"᩹ u@zMV]pK˹]ÙXlĶYJ}o֒ulhjjyz4PLȩp$+Hܛ,:ٰ>ߠ2* 0(QNz \ЦP͈$!!}"ٔ=/>!F 8l`<;w m CL 7 OhkUVT^_{-=v}Rݽp@ZcmlgcnPDp*۴|ȯ_ŕK!>P2ӝ #,4Wd-uk0E'E ieZ4<鰇 h4K#&юITr) ;ue04ӂ5yR|p.U,(찢&@wf/,*%fǾg@n[^k"RwzJ)?iԸ`2ۋET4f`* ~ c@2>c6mGP0{%A`.}%5QO=_M2y,5h7dYki@2ž^{f": uF-s7&yvNZx-(~=\xiH*'(07ͅRHVxW"[DW9PDNE=&WJ_.<wVn 3PsPrnA6^zJaenHѓʙG3=|^{MA+BF#Z%l0;,?72N;ҰK׸?xKYܖ{{`͌j@oM?c݉2CI2X cEf]qf֒&w8x_LxLT~]a׼g+V}xË_8?X[zϕ$$[.q47.RpwKRv Bnm.rRX#ʇQ%^ <׎xL0# W fR'YSN NƷAt>XPݫ0p.vwyEs܇6$\S"'w-{C9"d<h qP8FH(xf鮮e1c[!{4k"BH$=G9p:/>=@GW#u m"&&߇? A!U,43։irN܌z71o.JɬY2DX!jN|xwca`!dg5%ok;wriyp:vSʅø1Obi*$=Y9 4"yTIєvn4Rkd $*~2 sڃ(YG?s R-lv"ׄvau%# Z]"&4]K6)s=WM#-͹UʭZ/{8<ߤG;ҟ_OƖyCaZKL9%iPΦ`ӫ\N2Dz܂mE h= z=TzF/B/~:pt?2kʼn涹ҙlT&NHfIe$W Hz/eܔ"LBPJ(%4[@p@fa}.dB)Y6]L_wH3о paik`F1U =$̀$a H`a1C8m/'endstream endobj 152 0 obj << /Filter /FlateDecode /Length 2234 >> stream xZn#}W=_,8q*hiH)Qk9HE/UUNP8P9*\fv>[d^i;g3'teFWO~X.3]\}?s.;spןYFb|'s~IhDCRjN ot{nfb~Cwo馜M^ӆjQVU-^})\Ih%Zv ;1",3~MLޞKZK;i}vL7Y4YT6턨C|yNj}gF*ӱeUż5ش vqe E8i&dcKj7a4c0d:1cXȯ0*۩'iwĶRXն*|Hȷ|;dwC)4nH8~7&p&EmiZNqmVv`:U4ڲQ[S7}FbMzwȬ-ǴVĔ Tه]wF-b>x`-ډS4=*%Z4 QTY z(.]SKδ[TH! ;ZJab%>?*8\3{(^I5'xw쟻R,kRe,^ٚW%i ?:!GqMøECS*lV9 cVpZQ R[@M,ut@¿}d5F1Zx_Jk8OGəeF1YGTQj5,U]5ϧR4:\/SJ(OxBXf86WE5q_oǭ{ *,箁|IYz>قd47R(\.}ս6@1!s_`VvRlׅwH*emƮĥk3Wclq+^ϝX4BM0VͦwfXk"M zJtk|h 0 5Ɲ`N k_\fU<|+Q:C)C1C?D|G=Kn}UC 4ʽXF?9v԰rL)$msW~hY "w5Fwyy&ܚ]a`5W "EIЍ{ WP !l,4O mԍi@*5ToXT#k!_629{4D!8/ Z&:Mqc[!wOJ{@kHY?07|}9ƍOhA{!7lsRO%,$aQ< U9BL4+Nz+dMSudLq< ǎ #ԕ!},ga$J9m&;,ʹǝzi[n82$xJC`]-rvĬlr]3k?birjJGe9]Qغk1i*f0T"(D5P%09?/XhYZpߦ5 Mw<=/b: l_lҊE VGuZc\ÿUiC.,Rmܰrq~ Uϸ~)>zM98RQOer\ 5}n' ?% tL~{zhikxدrC2==D)wWʑՎ)$leAA' JhL2G4endstream endobj 153 0 obj << /Filter /FlateDecode /Length1 1286 /Length2 6454 /Length3 532 /Length 7261 >> stream xgXR#;$QJ45B PқRi;H*]PHoJ;*9[}ny׬Y '#=P pE @2E-}}990kd@"nH|974n($ xCm]Z( 3@a Bݿfxta0 *D p[fw%IW!/#["\(̎XXY q!.Ǹ  ' h!0>KN {n+}`GpBsriR|d wEW ͘A}@0bʮ( ".F"Ę8Ba>FXXL``@ubawO qk f:PGu1l..ֿ#awH3s@DHX\ l=J`*C:݄;IcVML(1Z f f?#b,@oĴ@ ?b4 FĜXy+?cE0V>r@PDT ) H['1A0Oy`0-$V61>$@9o[!^c$t:۹OÝo,l GTǃ`ԉ5/4ߦ{FGiZU7_OvıuFK{2>nj}XTA/׍H_n6Ojrv ^l[U{6ͼc8 \q-LIƟ =\#ߪ!/d؄PF\|yhYd;ND^Қ]g;X]Dd|%+zS\L' ^.$f:LFPi+U v5RV/ȹff&6b^\^*8C*:;di<$Rg;k^47d BKB&V0bi=UzPjǘ}.+〳l憺 ڭkt4D_1ET<4e qoxJH|iX:8IFTΔ '99@tQL@laE~6)#2{8{7ޯWo,;9t֜\zo$#h3%ǎR_=/f2~4%6'#ίt|Πv* k!-6D߰T$aȾym[wP#rӦ\p7[\C퇼&̧O_TAᚃ4bm2!sv  t*ܗ'{ܚ"2yJŇ iFn<*@@hŶ~N\쩮^1a͔pUSWC%P'4n, z*N:;+bɁhtx+PR@159C^^THȐ3bw16W)\^gT>o&N0)y.@*.O2r1EaU"QE->Ţ+5R3dcO|O͘S"܉S{O#±sׅI|Ѫ17nk EׁW삉ۤfQœq7Gd>)pXr-vg ͨ=;%eyDdusJ>z[wn64"%yw V~#V\=,FnzY]_upuǭT*&kIRQ+ Vןĥ<łj'NP^wՓf(?i;]!#QIXST.@&dփy9w⌥ٔZ w"b(0=g?Qg~Ra-9?ʭ j*U2/6z1yЋ{ײĒ DuР]2#}܋L49z Y1D,mziᓲ36ٌ_s([?h&m_EcS1bTI(Uu T9mJǾ3H|eE}Ə>s\:7{Ri@=ҏp sXݽhat\ Mv> fK9 xtlD]ysJL o [q&z\̎vr܁?ؔt8/5MTsoe=)ot{+6ljW<*&4ep6|0?Gѧ@vw{@4(@ޱ ޑ`ԏLXy`xShb,1UtDf}{>(!&'* L`t\l:i@Nw(YvQEks6*X[GT%-7=xV)ܪfDj@~_"8c9XeO %  obmSaC/V~ZBcy|d?tR;ĦH H'0Fcˌ5Zp(1m|%MRI8ZW][Ft TvedӲ_R,`v}seGAk5~IQuf3ߋH2ɳi W `/y-g&>_ ^d}RP**Q:._9W{ pY:f<;%uƣ@-!jײ$;!p!H~_u,ViM} .C f3x\8R7FRhǪ09$1E<"C0 .EJ䏉K?{ $:ڥ3wjVyz*=ķA/ાBUM$*ҼW%1QklZH'tJ F&$u-Q]Dg#;-NIQxgb6D^.\obJ9{ڞwwM#3ӫ:XJuhf} }?z*;:gu͉Et*pH 2R;!+nAW<|ߣa<o$ˁy拖57Z?]{уo%K~CpDga^c-{z+qq)uZ}ʋAyo H,U"O5^XE,k u"|׫ccpRZ/k7VWM7x 5`Lޕ\E^KALQyuPsj΅LCnXSKhD>d34?}-F*А4ۊd*ɘ/3@c"nE7gq $X *5A1EvJ(!j>iWTsĻ% GÐ'.dxbn AAk_CUlf\ԓk ܲjA|盧N/GS8hKm.CA yv=)-~vV'"-L%4 OT=pdTK m '`)=[MXgT 49S~l'y [<$ k#%=V:Y9 թ_,ϪP:A.]fF\H^fٳnA\?L#wJ`9{h{KN܊_8Yc.U|h{iz]d6vĘrcIr@7 u(*Dp'tcȒl ʅt"3&vu*OP.?E[s)4i7|"MktCHD~U?X3\}U]lk~ 4=n<pgK k9Ī-(94KRק{{]`ܬYn:;ӣHI]^@l_θHl)]}yaJ_n!w.my7 [?P:] b{{&}p8{i.% $ QV< x>"zba>o=Kggxy S>)uT\Gφ!κopz@MJ 1r/kN)1:^kU)h{DYfd|X嗸1oO#ϋxj}^jxh"ޛNoGxMgc<|uM“Ėxmq+spQ&χSUHbfؒKHջDs8v E9h{Y6"NWK}Ogys_wƙ"wL>[_d-g?<11f6Kbvy.ki,?|OuY#Qkh[endstream endobj 154 0 obj << /Filter /FlateDecode /Length 3021 >> stream xr_[*ACvjͣR)-H#M>>$;;A 4~wC?}hWX㍶|p7 cK{s]o6Cwm(t$/, nc+Jʱ&ںU)jP=lF T{Va. ⪦'D6,e3vl ]atD8{$Emg2(<ֵ 6dn4k sԚP(WQ.)ls[AA+ d|Ir N=gj,R D\A;#؂kt }&\ d bm9͊KT^x;h ;㫜Z|"AUC-Ev x/-s3eOT0ׯIK/؆%7{~n>*TBAhֆ# xiڪ_? AC*LNvj*DԄ7;"؂Xov `b"D1gVi|~2G6RuEOcns`z'm devl*!E*u]DĤ" O_vxaj8,y9`:14̡ߪk`FۢvUEt7Ò(d2z )I9~qzLxN~~w},B-wDtUU}z'Y,'U _L~O~-+El'dqeM9 t+'rPə4?615%-sg1" 3sCkk?#h,Be'yDGD qK^ƙc0# %CBh}`8ӎj),).ǶŰ4oC֞3MȦTD %2jdTx {tmFD`ΰb?  rMDjLϢ8C!lR=bK<Ȋ!amJ!-sr|U]w\t b,2֯~/_-)e^x&{HtX]'Isw6tBe#e5袣wmCʔsJ[e=vu(dA$E?uK,6lo^OMQ\ }]зGVfM QU.`BO#=1 k)荱/cjQyKi eHYҺ<@4$YOLmaoi0P>5#)WA ƪ ~Ɉ)N Bdha! f~0hLܰ=Ba.إ|OK{uRXPG>9<ky+ot~:6 yH.Psl&RO]쪘IBhn4<_`7G F_q2[ce,DġA睼] pי؎SVH#&]ࢼ #8#5l 7[{mp"E쨮fQ[rcQfhT*' eTaVJAɽҌ-ߧliN^# XS~Qq N.Dj£\#Uo \Styh/2,ASp >cnmb8-@)C<0:>P#EC2*0rV/dƤ{5J-]Jds?^Ν- X[=*<:۞@Fh8T#hʡ'VP`Եy?KU.{y iŌZ^1Uo O+/D(Y[!P5\jGG{3u傞CGfޟE]fv>AmYR!a;VN x+-Ǽb9:PqYRdeE;3/( kSLX |c8%?Mݸu܍EtzX.+5HF~CkdMJ,j.À<誎 :#h!ŤWT⟵+ʼQYaB !XlWtOM,B{7sIh3sgv\$KuaaR YԒ.kO^>y5ԅ/ Nc=)qF'̒+*: WQ#.O6{ܲ} @XTʳ*ǻendstream endobj 155 0 obj << /BBox [ 0.00000000 0.00000000 559.00000000 432.00000000 ] /Filter /FlateDecode /FormType 1 /Matrix [ 1.00000000 0.00000000 0.00000000 1.00000000 0.00000000 0.00000000 ] /PTEX.FileName (./shrinkage-v-iterations.pdf) /PTEX.InfoDict 88 0 R /PTEX.PageNumber 1 /Resources << /ExtGState << /R4 89 0 R >> /Font << /R10 90 0 R >> /ProcSet [ /PDF /Text ] >> /Subtype /Form /Type /XObject /Length 9034 >> stream x|Mϭ;R=AiYWbI@A}!և\: Zjn~ryݫ|W_O~Bޢ7?x˷fOjkg,~,i:[?owz2INom[|<?4i;wqo_k~oϚ%|_}}Om~oPS٭뿠fݾX?z[WJm֝SΞ+Sh~-#Znŏ[|97uo1V1~5_pZkxpݭ}1s㫕W)ٽoM^Ul3^9NSo~B~qj|̯׿oǟ͟iۊSwnzJ֩FZv .-Ǯ5Y.\p +EY s|Tgճ"ܬT)ϩT9`.}/kYڃ/~3?Kް۩-%zťq{hZ6kκz ُ̃6)N;+B_X?K-?%K;4~k6ȑl۞ow>?&c`3-A}慄hSZO1{pըڸn1|R?X˝0>Xx7YsIm6Kdy-^+n:s[,!ME/dP&K1M&˓d~l<+ oZǕsg̹",͋,v7X,sE?l1-K7-yo7FlD'殕5zS5eHrQ.yݲkgPw9&um 0%Y9OCKsjyN!_܍RTvz&-mC h8^!) o.  [[;ldkADבMt.Ltn61ͭ@5iNk>MtZ&:mѥ(4]#ņ&th4yid3pL?&:i&uDWvM;Q'dS&&;wOemsxp}aO]qos[1׋~˲){E\ N#+cgA:+m¦:tJF:+>,t!6)(} y{:M9as@z*Ӝk-s}x攽dS˓;{&y<6Ǹ.NAO򺦸BuaW^ gJ+>=F˛+>JK3%~;K TeZ6nˁ!+- Mit^x(vٔüpOgvCk(Ł+s]e\!\:ks=Q+vLp<ܾN\ٜaͥ |uzr+ǚ.=`qm/bsA9)/1kTic@gmvn9Mecscp;ܧpO emnUO:_ɝ| grkU= ,`hj$es:[7FmKu]"H9eսuCUu׻y:ߵɕkw:N^q?HXN? kߤ|Pn;O sw]QTi=d# 1+.̺s{k7vm;õlnXU-#Q VjW睏隕]ᚩU>eWr9:+R>d oE .'Vvaѳe⡓3-+gu ̲ٖQZw1pQn69una6`Q[#6ݜ~[cmhaֱYPq[[^ub yN5غ},s2]7p~ \ U ξ$s\s"e0(io (kݼ`\CT?tcLP*XM\V;[nE3 }D@*8{SѵnWU*-g]~rb'eypͫno8D5yJN,2ƓX-X[:-7Vƅv qTI)xxĖfp͊k[1ˠ;N IuJlm6V{XhQ~Ž7@E*.yY=\NO4hE.E+ݦua .dh}Sfs|Xbp9ƨ57F jyj|5\64rxh=i.IYFDطae1$}H(w|,0o:8}\{I齙K u;XF0 Y O6h`}IY$V?dnv рAZ p$or%r{$볐}cJI-)P쓤Tw'Yˢ>]8|{iYRigqC $[v L7[v9=(+2E]}1#g}>emay҇;uEPv0v[v"ޢc ʦdӨ+ڠ˵ŒVjgWߦ_@tasdsúnOᲣy7e"շS:AGwU9A5'AX? R}"}' Ar~`VlN!EahE+g/>I(W z TpǫEtr5 (zJ׾9* 9RL96viODȈ_L0Xer-V~_B+=u7WqHN/=mC8e|R8{&WsgWF+8pD;+ `IDkqç` qNWEg3Ing$qc "s=²<n3>*/3Ì[e*npvŲaL]q֊ iIp&p-Ic6T)#A S0W\4&qO8X[{\|4_-ϦX IVfs7cHɩm.tHp3и3Vj8wD׉yQWWlUXq 앴UY*`Zn9)ڜqf57Ϭq Uft<9׽׺<;61p <3\7ERǓTrV-YN>c\s cS!Zq3{QJi_o`aKqr7U1 aR[lfq@?_8p8-]qDqpq|q7,ǭ)7}܊-̭0^M&G`N8Y*k;S!¦,Q$MCYE&A_2%Z141?ib.T\BPc*[Aw) E; h*3m57Pd1k(gXvZq\bxf՝dj&jqɤ阋N&}LHt1LOZyh@Ndd}$&\Au`nH" W\`rV&E 1k\ÂM"eG$rD"".I!_avE rBcL 3Ht;I뉐\c)1Լ8%8N٬xcyu9٬1XH'g8㔏'VO -e"0Unm/t=2#8}J͸bqDyܮ&:Y,Cl~E qM'Ȁ*vriWuv[bjT%Vw/7QTX)[D~O Rqp>EPS=\pUt侁D+EGs=.n\X<9"i0PFRLy+֒"7WOp=Z=HR_4Yd/LBp*Uug9p~X{·gW]Gh ojӺJ7cv U WW\q:/ 0sCqZ{'5w/ # ab{niYFS%rq6CNZr_:!GY#&̻8_r,N2:rCλNZQ39O%!lWr|8^,.9|#!Gq␣60GfkA3r.Yu1&uk rbAN 68Pb7?d8ucT)[q/NgVJt;8 MLJLb @!ţ8KD2bS`ȁ' r] 4QpìGsWq⑩!9ʚ%7~Y`ȩk9M,G޼p$+%KƈG\MԒGNmxd[EJ uaFF(YdO-2{[kW>@MAqg%sdrǟ 9LoNǁs?Λ&/u ̻U0z"9pbS W %<g20teI+rA83z%6}[6Wf7T]P2YȊY)[Nirdn9e[+qdn9 sGߓ]{fe-3]rtd'2l&Ll{š6(5 +9I-康^);Gl3!1 dacDw~MZ曉(AC/+fr8v|s i$9͢&& D_l^_7 QQGFL4}4e 6k.!*ARNf**R1|31~&S]ˊ[NIjjN%1vyeC+YCWr`&| _\)Ǽ1攉"A8es"S$>'K S^Q6[+l(G2^/". z8=x=0^/f4ǀ׃?|qïB, B4sd=oݵaPN?0 %z77 OE7KA כb*o:Z]j_t4P{VL9|9ү1U{MFؼ9C,{ş69.ps_s,C9揯9̦58 шe~Wz1ی 䋎M}k2Лhϸ? ֥Л>C hD )t)u逡k\ Z7|tf# o:!MG)O H!hs;OK Fk)0I}@N^cmBU:$BX6`B'^2!Qhd3$f`% v3WnqBtPA;7vǼa|!@!w F>n9+ ?-nx &c_r4Jc:MTv3@jl[nz7l_^6C?Jz[z7ݵch~oܵŖkv/j+Sچ6f~Q[3)mksVULzQqeO!~}9jsWy=]J>1p[;.]M$H8JW˵VR^Tpw.~HpxA8os Oy> stream xڅWKo6W{">DТ =Xlz؊m44)ʑ(Γ3oFjYZ*cvi*tMX~_xP/N-_/ZשڟnWiK.W˛Je~jmMVkWwB ~M1nE$Ve-2]q/\n+,\~A#M٣ʁ5ꯛ_!Vkr_8"tYks+yw Z!!w4nn3f@.ĊD.k)Ѩ<׺p$8viDz\ f[!,PL`|'3,֟F ҈fBmߛH('NG g .ViL(ߊ@'=ָΚfhiiB^iy hV*Es/ sWE'kmԦZ^Iூq|`Z)WTFweX `jJOK^c|{RKK;nȡA$[*#]4L챢l..O Ý+q$ oFNjbAmkbv3Ј;:ՐHjp".ӖUipQ 2-Wp63r VB-7$w! ;mG; 5ذ9;6Etz>_F(B e$'r>> stream xUU{\aav`%(AE`,X]>peW@G`AE&$&pAH Ķ]"hXQ]QDVҞ!YL_;9w% K $I۰O>wh5ys&DS-i"~)%j!?ĺj*hIP!"ɍeeeoIOMI]&,suwߎO``tͦH4驙GFI k)T\J֨aJV^Mʗ,rz{kr UfT.MФiU9I}fZiN.])%X"'"BA,!ф$܈P%&#1D-aG0P,’#!RF"--,E^um1VV唘rljPg÷J>;Î>{-e=0tHPu]7Nd.)@w"o`@> nI)剌Ίoj$۾Zѩm GHrCTB0VGfXulFt;d&ِC>yB*~.yQ˭Os&LAm^憕)輑ϟWf1sVR1_=7fcf 3([W3 x+1|^M_NxH֌qc BƢ;$̛)qS> A`\ l`OJJ^0̍.ndޘsc>L^`kX4s)d@NuYaD"'URL] Nn31=P>ŠՍ[hx_>&[?Gyexg6FDaOlƟ "ČBkV<qQ/f S| HQ9&q^r犗anVނ|Oر2hbX xj-7'7#;z.:B^mU5D>(r:6G-xmvUGZ۞0_n> /Font << /R10 96 0 R /R13 97 0 R >> /ProcSet [ /PDF /Text ] >> /Subtype /Form /Type /XObject /Length 2497 >> stream xZKof n VMMMct^I`O03iKQg{&6M)Ýp?[5v,ḻrvj3vtR.?onp2?[yw!3ḹOl5]Mݺp|zp 8'^Rm~z;\);i.L*[:*nѤ|m`H-kkpgbϧ[d&PrUk눍\FX+*6 {'R2FKHdAli쥝%mXSb{^Kb = ƺ\]fki1ZvYYi|aNf_]WEKkS?E$4l[' z`NϤ^=bvRVBYmC\ nmX+6ZN& ƚXs6DC^hEՐShk]䴎,ド_xmoƫ>BnLԳ05YiSyypVU ll2c8^< Z`73 |6$:a%x[Gi**zQ;Z:bʲ*,֖=U{9gz菼*`e4ҋ23F&YV)[(\e&VoBj`s=4IL]㊸,u:x05Ci2!{l% e6hUI5):%]4-(xi5xsp1nt7.IϰZBW*] TْiO| t3FH\jb\@.C`W,g0ֆ vaV' ©ͫn 4l"&踤Y]EPxPQ&.Sd\Z4a'pv40.ʖ |ʴi"%`&6L^]>B' lN\.Fco%:W,۳xd~{A6 !{ |-}l4mPFEP REE6$U+C d]EU:f"evМIs *ﰋ(DѶ.+t9&l3 = ]|n7A.0)o|"T j͙dQm >idrUɅ[n̖Ud.lxN: \aeEL,̴f|f0?gcՅ^R]ޘXRG+`meV l/X}㥙`Ukya&[&"33?rx3Xe?=7fƙr0!^B^3@]釳81pe֍{]W:?lJ/UFO:L=Czᡥ&9uib#?TqL],~H6+<=ϧ˦ |vld'S ?g aG;kN$Ahe<$X=4ފnxXK1> stream xڵXK60 C%@/i E95=(^&mm&%Jm@%j8|P/nV Ua*CVXoW7d߯\)ܖ!;s{ ?Eh=JXѤ=\wpu8c< w{ѠΧY[wrӲ̦ei9ި~a2u햇z9Dֹ/}oeU^ 4reEh[׵X oM j 6fZiWY0 my$E6މfנǖ>L_J[dDR|𨓩GvS$,6+o@I {'݊w{_[P!dԦ1˷@S;]LA 6EAQOPP\@$/Ol(RH`xtܩbd#@Ǹm݃AI"OT@ 5%97{Oѿw"= }G>N‘F'v=c!N)B F~oꏱjnsIVԌIg8b%&C&yI&gp9]X 3e21\)kKvIT`!|EnDTc"-[x"X,0Haq4\ \UJ~h_+8xUYLݰlB0u "w ϊas+ !!.` }g $4$JeD;1J^ծɶZw<j]^ZsmTn3_Q#)RˍՏ7WU* ^VWyR=8LvfCwjzxTݿ*B1/SFjr e,gy!v~scl7"!MÞ,\kJ z޵#"q*N6 Nf^A[DV=[rA@ f CچW_dg]aОClYqY'{*CW|WGg,v$jAͰ9 I.O{ww3*x+!B} >YK%%N,QB5L0=#I 2ٕVWCKU)SlU nn ,!;% ϜR͢t!5m}:5-xꙛ|NLa#a22Ezx -ԩ$ O9&l\ձ"zj H +I\_b Heן-Z4s,u7|:>r`H?ޏTC&\:ax0hqd]$:! _*S=rvÇ5:៵ &k$/h2$,ixe N[@_ W ~-qp.&=a%ERLK\EHHxaFɊ~bspNs =CI}KnrǷ_1nЬM# ~'/%4r{JRz$}lσJǧ~AЗ[_A#R]7T%뒖IYb\e#f'<³/+}r,իܕʺ"xQendstream endobj 160 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 298 >> stream xcd`ab`dduM- M f!Cﵿf2`e̻GOqE% A:CKKKJKjqfzQ_Wb TSYQYP_ᬩ`d`` $JsSu222K*RsSrSRAfU*due&+=o^X) L,|C泭zr2σ< }nxendstream endobj 161 0 obj << /Filter /FlateDecode /Subtype /Type1C /Length 2928 >> stream x]V TqVwY=A60:Ȏq[D ↚ {T "aP02䛨?I 4?sTwW}{o2@D"n V^JU2-!&Z?9A ##O_HJq吒{M/(HW薔&j33gΔ/](On2d21m܍VbqTytl2V V)%2,vvdp^*NL+ʸtUtLR5.15)M?%.5>-!=(C^8u k8{)GS6ʟR(;ʓ)/*)*J-|)Wj:5r8ʌ2QF1%L(2ԗxRVʀ) 9`jpqx@e\/Iܕufe&.jA)# LQ3a.$su"#m1 w g:};]#d ZHؒ*LnN~6__}fi\x-6 mvh0ik/wG#6Z˦K\wFQq{iZoHtUc}Xk,\Z+ 1:ڢvovS mͥBӍsYDzE7#mLM+:Sx8ɼAy&tI9/nҰF">&ҘCa.jxX'li8Iq4qMˎ_S$;rw Z~4nȴ$C,LjD:Tl*Ǻ{!eEEyߝ51kٶ qv` 8|D;`g;^hC+LI^DYBP<W1 yw{28xcLJ vllh5΄}$Ep "b5'ꜰ\p"KsgbL1O)&fUϑNWrҊ7M pœ"=6^Nu!R b[.& ހdo^f~*yҬlGȱZtIdJڰ2%-AdUb}jO({(`eOWa;zՒ4eBd }jIizRteH\w 'wbHupGtLU}y VZ?N⾵,jyH+矯IlYH~2cy(e`<.o&x,) 1| mtZ+'ag-8l-,ap.@b@uShH{TS"ahXH t)}u}sg8_+sO2)W~zB2rxDhI)MGs(\"7֥S y(Q2|l]:XwcGPB% sK}vA/d(Fmuck;J\rOn.[m{mvg@Ni 4> stream x[Ks9W(W9$Va;9LÇA;d9[MI-[r-luI/ξ{cDiD! gX/S}^)fs5tFW nQWjEw}Zެ7~D`T>Ҽ4oE퍝]SIE"?Ӻ}6L)uIg J}r.q"5y? l~Ƿ-<.ێs}f@1+%1WIXPw8] kfƐdW0bͺ}ŻfB*I _؍Uj;UeSk/"j >t3 K; d@s>̩2$FV@ͧeOãOyۍDq9{Hϯӗ) "Jq8&CdEӟn;4#-IwOM3)[=j-24ZEC JEžn-f,T ߙ6=#nZf607M8fߤwNEWB~T禥R}^h.ǴjHD Fa2*A#xAZM1|+LU0;a#'V+X!$?ULs3`*?Fw*(*TNHzh!}=1 tyO^6OWt"UdOۊQ):a&O+Tkdyux 46,ѐxH[R'@pV쇋3E5L M_v˯rry&'?IAM^P! @X0-XXe$_s35_+Ga,sB4Œ9tirbh͘.@JvFY C=P#Axlc< *=PqC CP-J'x-*9@MT`3ap|TL2"I~lA̹|'" !K=*~x#J 3{@_{s!U]zU(El HE=@_dkf2:GTsqvt'@]9IrkdbnrB8*\u-)¸#9%v󆹅fH# FuZN/F.3n( B߆HYSXI̖< m6(DOA$ Z`N2Z/Y J~y$[i凡5j|F3w !zF4IUf`70i+ 4D*FO#vJXWLC7>O_b9,cUmGQ+z&Α<82S-UF}# v{gc XiZ66K0bqMA蛇Z# BiRom)d,Xb_ZC;8`m:g~wC^On 5qGpGKa(J O7Y/lY|9DF==JpUG10$fZ Gn]2 <~e(/'r(@@%DlzIp~Gixלܢj:[aA5Ӣ%a*bͲh 19`N,@1^6 hu[`hT*Mlܨn[UNsx|̩gx>-4fy4%Jު#WQJ?Eendstream endobj 163 0 obj << /Filter /FlateDecode /Length 2763 >> stream x[r7}Wd4*ͮTjV~-Z-hDZ~O`H #uݔ64>|5"UWZxY/zrxtjUDNt|&QDK"z&A6ԯ C~<{OPO0D-bQxxh"&F፱gHh@N~8B`'E QMڵ?8=/kZd|̦lY*KU+f$u0! Lc͸81"ҍfRAZtOlZ'$QĶ-̇*$a19M& zSSaˆ5*z1=&m]Lc~/( KJm ŮΖ{T)l 'NaOSotViͬ/415qz8W!-B ⣻Ftʨ>vZ"o :_3삋#EsctVhV)+-?l6X<ގw([%p|wwgAOgl̑>=jYk ]~6OD߯.]z}H(llpS~^6Kk#LRǏT23E?#V`?Ut^頚}J oϯӎc<`Q&舟Qw4?LIՎ'X-dٺy}wm4)bdŻ$fUkr ;J`DPto-$͠'}pe౪_u9W2,RTylzW DұZ-،#l%V>$]=U"XΠ`fJ K0:Rh]˽ y1t+g\ʹ0TY@#" [Ud 7#4_pi8eЬSPTh_{!! ДǧYeOS8k)OuXgYJ׆r'u;LZ}He͛ծS}y)OKPHu 3`]?k[69.lrE+ܝ]wByoˉL4hFtJs)[MM6uNZ NNx;\_.Vi'muȧuSVk/+-kJ/xЧ9?f//k׻l$_-z/y Ԭۀ,A XfO pqsfY2z33r+7wW ͺCB[J*҆2 4epb>'%L: fз . i?6귻 yJUyl`Av5"|!)x7OA:#z%h[58RJ^|A~NN۾{p^i5܍n[7 fh S";Ě0T+ @ś&$cZC"J -e8[w;mWKJݿmHB`ݹ{<6H[N*W˗A+=)y<gR3syt8w]BygWn &M8jg׭^'9($}ஔ.4 @_l݈G%a"}V9kLNdS{>ǡJ<찠 x4H?IRHOK}|g%Z1+Ӕ/q!`!uBlݱRvop;ŀ燈qR:t FB;;Q;#a=)j#'@\DkU.ڳ nڷE:E8Oy[ 4'{+h[s̨nrBGuN/#S)Ѫ@,Nt."&d*E r6.J$$f E7Et;F@MD[6x[]6 |e7`ʯF}[a7}(:uA1)~:_g,#]_p/vc;W_B YQluWhn[d0ܺ QIf S8oqyh0[1ق ),h(W!»+ f^GY?.[ %azs;Ȧn{tV︸kkN" (j#&Cpӎzׄ[HpaWގSE9j,Shendstream endobj 164 0 obj << /Filter /FlateDecode /Length1 751 /Length2 1072 /Length3 532 /Length 1617 >> stream xkTeǑr/",ۚ"*(:̼ά" hr3qQ*JHB!J1H4`O>u2X <12IBeq݁@΋t0ll)(pAp\_8:;}JI#$ `͜Op!QH@)(jJƩ`#T,X .`80')u/XR,i(`74$FR5 if7^7HlETo.U%!e E. /Hs))zR,>U i4C{o ̷yӫN(6p^ɧkU p~_~^K$B  @f`Sؔj2 //Rہ<Ց\y ba8*2 &yZ(dosujEkTU&h88?37gh}[\o-dv7c8*4Fö6jk Խ䁼~[{z'-fvc$1*~afݗiφ[e),noE}g#sϰywԾͯ-L](]wE9dyKm`͏L<}wn-*7Hd>]ʏRftN2Lʂ\9 e$t<'cskeα~fՍaQ~s)3?^f8bgku,Оz&:bO[Sruv69ϰ)|m=P3}t dp{{%4`qTLkoĻ>%;,qY~ED~?) WUeKGf4oYȴ U_ U^Z.IU{{mÖm%rMPn߉a-ڰƁ[V=_M7hBK=&(}M2*5E-UҗɴFYitӕWd|.vП fJM jW1cl8Wksr?lOU`p:b%j5tWG^kr59ѣ3,5׋ wAF*ź33 }-톚&*L?0קw][͜vS}8B|H9yTGӛ^n~5NAK8 T-)W|Uϔ́uUALKhMG[bͅa_TZ<.N)ZzCIwJƚN?d SOu7穏_4gTfdǫByot!?E.!(UkZ$/7UU06Dsbr5L'$41k!}1+]ypIXlp#]sp҃pr$3M֦ݼ^&OZ_9kÇBR2b 8gendstream endobj 165 0 obj << /Filter /FlateDecode /Length 3501 >> stream x[[s~ׯ#95N:ӴMLޒ<ԊV,(Yv~}%u&}g_gހOkp-ǩb~p;ϾzGr+h&N4lf:R"| 6FjeVOCg&1'M׋ۙ%`zOx}/V+K \&sғ9GIӻ)"Ooc Tڷb|bep BzqYjizJ]<LXce}0˄vљM2) әO1@]QK3.@ㇰz' !e֧|?ܓX\׊7uȖKVB7Ȝ,Eo#'U8{d*Z!g.e2Uyu(tmI{7awaqkETo_`z$-<*z$;UOe,rIB>4CljTEb5 g;t8'ʆJ:&GAG|ɃN^ѐ{$-nbXnq|eStx@] Ex)ݜ[+ڎՂn j|z4]_9§3_ GzV&wfH:Ok3Ld;X T9jZaw0ϩ|f҄F}m fN, cbYdI/:hY '8=lOc\I9Hd>ǟO9=^pL5-$ڣu/N/7IC` -u JX1U{#7Gby5JADw4#Tey=K XzN.k3FotR1nGq8 jS9xl3gs<L`د䶅?#١C_깰8tSIö\ y0,!ng6X =,Ľzq[u?c7&Y7aگ?];ZG&ڥ Ba%% *zшRʁY.RڙPm`A߰ 6Uy0繩JVi(>%}ݖ^mTjN X0k~&jjWSnlz&. Y.֔TudTřQ* 9 ]Ƃ`s[~㒞O [.SdgyIJiKW }H‡:<˥ )W. Y򲪶BⱒZ푥0z֍âd4>rsrCoRmynإfQ)~;(u _l]yf(A|5V2K73lku9OJۘf6Kx9Xy^UٛPyJnhSPv } wgqb Dl{%l!-pg ,yUQK\)Yvt w*л?; Yyʛ̊ZKQ~ϑ:\?ԖwYup L _&¬4CBDzGƕFesb׺m Nj`XP7MDTO39$Fċ q1=jNf;/GIǡ,HRKkJ`akn`H޼('ƒW1q(?Ff:,WM')zi=kksokDF_ƑA\@%fDMdoxA&;Bb8[:ۜ+#DͭXZF2v6KJY=ۇ>Fi)HY߉0ϙQW#HQ*TڜwsL艹nqN)G={禜(-Cg23_#z]_*=3[$Q@$c)B8/G4yՕ_50yQy79nktk9C8 5H;&ٸ.ؿI \?ØMYr@aT$gcCW@oTendstream endobj 166 0 obj << /Filter /FlateDecode /Length 3090 >> stream xZms_ćw{I֝f&mh&@˔LYS8TghyX,gw_}RgހMaL O_t+;$Y"'"kL]7li[hAP8zցQ2xYqk-bÊ$L_ӺwUHϛ'<&O|\Ϸ3mh8^MI:6/_ Z/iRI= oE6ign↗c-R*T0ŵʉaJGmbAN:46cޞņh0'*ddMm.ZRSz+E4fh& r@ F*f>6;Ku3Mn1.`]FPaR) |N$l|ܠi]T7lpUH .N s@jowr[#(ymCo+A1zCozd$:o λfCa7磨M R%PT[LV0)Zf*Վ.=7μ2thP ALS<Lj"d)Kh22ƥwMIat p7 Sf!`Jٕ2ێ4Z W-%pn#uآ=aỉHi7KZIy0Q>oqD[H?(ط@iٯjoކL%oYj<^Ic1_ 7JճY'm" ݋8 q\\(FfsjR,8J▓'/mqu]Nsr# 8_߯$L#QW4BJv4a{B';Q/W:#8є@8S h(gĞ]uֲ!mxW d7V 1$+ ©c<΀ ٙj;E! ! r4P1A)xocPW$ƠzMLCX"L *+&U86F88CAٕBo, 2Йen'4h.E"EU/Y%Ϟ\vghSQ䟪Sfk{W9 lYgƐGsmBdr3!lK-ؖےUT hedF,(QҐfF R0"$p͐O O3Gyw=/Z qhBKCR$ Qh.$oVMWQշ5+trA(2&=&(p[ PޏA(hZqht7-32#~فڵ"UHŠAuX7<޼dӕHpyCa5W gH2E'9goL *PVڤ$}ʫM4I#1t[u}4$~El?wz+挺oƘgH* 9Q"f$'= ]q~@*]d,_\J=zV!D!NEx E<<^p 7z! ='QA|t%))MR [ .O)i ع_}cG >U.XW0arb x5v=ϋĚ5+sS6Y&(CEHzm^:O!o:jzz0"x߬0^CdLhPz71,a/֌}1W@;&`(em= .]@i2*(t=tQ77",mPH,biXz-6y, 9I)߹gX~$w{UٳZ7&y*IZj@w t;`itoq(*-(9o8YՎ+?W|>M~ z}N"L'7<e%ތ\̛8f6C0(](޻eQ!G+vb&+}qu>.cQopCw ˻*.ÛjOG *une<]IJl9jr$rA=D=FeN{ ٘W׌P(Ǥxf])3$}ZQa_:yYfD[O)d)DŽG MRˬ(8oYEddi{2\ rT5!Bzjz])3XM5~d)jQ'pϹsa|.Lؚp8[c`@)p|^GlmN =4wĦLiC (ߦendstream endobj 167 0 obj << /Filter /FlateDecode /Length1 758 /Length2 1158 /Length3 532 /Length 1719 >> stream xRkTSWi+ B@t BP[*%$76$^yXlAbR*X.!jXbd# Uzqf 95goop $BT/K`}ECC`?OOW@ {H^xW=Ed"+}WȀ"D<&4 P&&˰ʉd@!ba1`bqx0(f#&͒$(vtHe1PZ~Ykɩ>?!XG}  I"D 4z#xZ4HÇMLB<1 ~2`ZqpQ~ՉBX0b SmE?tJiMq#F1Yp!nQfkCͨMi:[aQF Am+=:+Ƶ/*"3_ }\JfmI8{IEn ?gz:X_$4-E4h1M6 gG g"FGT'B !u*.cZQWCF/˾ܳDmlFyyI+ /4[9<9k$U [uWW>!GBRuҘťj~4+~#"Tas_-9SY[cW wL~YY%P@44?_"zT'zӆO.,M}2X)k}tXcϸj''%'q5[~qؽ vvt|4.qt2#_? L>C1Wendstream endobj 168 0 obj << /Filter /FlateDecode /Length1 766 /Length2 737 /Length3 532 /Length 1292 >> stream xRkPWhb$<`IQ^%)Rg ,nvaD2 FP R+:8JT`PbB֊V@qsƩCFR$pV*_ Pep\20 aaR"BY GB9@Ieh<51 u5( Q& X J5!c9Az΂ 5 XSq#3"M1CK( zX̚"F `PPYu_,i T7&?k0 Ja` ) d8-bA7U1(kd*A" GF& hQB&[a7"VDWOLuCqI0e@ |EW5Z!_(DX"^}6鬕p2iR"d#'1hIa6hPEB ^3 8XhC`=J7^BAE H",0$0eNYwjm)3յ3{ژ^ަپCgN>vh-<D-C{o չJT>ZWzdvy|7a7g*"6x-ڶZxRH4 S9 7}n7rnt0N%*gU"K*]:wLi}/iαU5* p=6uAlTJ!үWܴuBKN(no6Ǻ/v:?/n_U&ޮӧ֣a^Q/Zo2~C73KAG#vo\b'u=-.\h;Vnoonr~>~bN?q;yȃڄJ/w8o4CPz/*cAendstream endobj 169 0 obj << /Filter /FlateDecode /Length 2561 >> stream xZms_qO䤇%n㴞i&4yiKCQ_gGglP>->Ǿ ,XiF(Ŝ1jO'˩r׏(^ IoD ͬ&+eycYpʓapQQr*ɕV M!Lrfmxq2e N%vى(iOk杔䧓y8͓0in3lNK~7.ONO9hlmf40m&V'@nx8J2x2yjdc\XSFb9+ҩT؆ђ)%\q(;ǭeRCT{z;29$[)|jW*D(ƕKΓ ?+,Kx3x#`IY6r$h1h+zC6Ḃիڬ>2#rrn0. ?z=YR2<`⫚\q=[z*=D< л c'f;Z4XYZoX!Tnܩy6,J1%C&@0ihgB$J.[}6 Ť "Ĩhv,0kW Wdl2XW=3FCUWٖ:SM`܇<֘*`.Ha {*UY3 8>ƆmfFDN!Tk5wAEgS7?b։o?TЀQlv\oZ5hzQqSLxD|Ƀe-0Ast\Ƅr$1.wzƭ0YcNߍ!4&/iUEZOB =&^]{WW)c-W_*oҨ/PRQ)1֖~S%uޡ!FZ"ՅTZ`h#.ǰW5-uV04Mj);Th5P# Å ߥ2w_E<garg ួ| ] v^h G NA'H'vS wʄpu>)PqYUBVɇ_M܂p1Ith(!lܢחIܤ\ş7Ef$٬-FӃȩEztKwte6m5xvst$[2vjwa_ǰe~@ɖ8HnjHL8CmY=mС7likywyʳ.S9_ z+x]MX)T FR9:ߓ7e*m娲NeJ5J/v88'BOR?WO1rOvIr \E5FCbcHl-u&;lq:BTR }Q^w{Yppy½0CdFjU]艴 JeSHB)=j k=5Yaܿ.j`Ut*7Љ>a>6Kkra-ڡZaue׮PУY=`@V\U|\W#!ha'wǞi1h*󩴩C|6=")D!%mun4ܥsǡoƘu&V:CP> stream xUXݲ[pwwwww nܝ;-@pk&k?>黪F3j;)IUÉR@G7FV&V>/R(af`e[X\||qF n#@4pfY Ppp0gB`eۘL-l#h p]. QHZH9`naղ)C&rW2q'=GNn.E~/i6u31uU\`ibj/_e$4tur8ix;;?b? ꍋ@/(%h4q'4 l-^ L@7%K ?`vvYIpL\,-,oҘLX8NL\1},\ fſT6%叁d+$3H_@c0;-,Cb\6%omI6$@C rTO)!P=?Ҫ@ՕM< j+?@(ο t1Ye3Nsr@W)7Cf&VPB9 AB֬BP/mB/p !H/p Au]F//p A2Fs8ӒW ]ڤo\%ߍ鍬V\'[g;05Vh/CoJp9\z}] jdѱ$8SVSH IVD :[;ݰFc墷SPLyD_ZqutwOSù}a>4SN啊C-ɱLiYoe;dc<2PJ>)K)6C Rܫu[y+1aNY½(69 xp]#~v <|$@Z)vp OEk>XlR @ՖFs-i;-\kާ@nR捧=ۯ8{2)z1wrF s>rS;[a.ߐ| #xGJ*b\ Lvpӯ?\sdkVTgr{ܜDCV(#. ]9ri/^*on1էȰ;W@]Op|g}`H)qjNj}T2%:'ǭU֋es2嫗E9_2P>֎y (`7ϤY*\5AZ=0#8߉zsDG߅c7X6ڀ!P-Ih1?8ֺܑAU0I6~U0iI͸I#h?DVB><̑p iڙb5VM5A'fLlZ:y $fO6_e'ӉyWc7Zd+#a3۴A qAtsu{U:kdG^ğa#mpaݨr} [M :_hYΤTk\s "]8 3&zZ.Y-NdGxSc@U*U" e" A=a~>k(<77~gEėj ->ӑԚv5iR%4GzVrzџ"Ǫ1eOs*r#{>;jb-j"o7;SdGYpyP,>tq90wb4oKѽhˏ_B54q1L\QUJ$4H)H'3cٽ??쀈Ӫ֒ߺ%G]/K+Ho.MS ZOnqnKCpw@dcxJ噒5#JQ_`N>?{P^]H5R tHbPԄZ Êɔ]ص):gzmUqb,ݿ69ϐ`;gxq\F["Ll2y3lRv1>N [M؋lO-'lݳM'_ *762¬ 3  #I!u&ލ0RxH'2dyobɰ4b#1sJx䁟WY.”-{sT]% GV1n.}:IU[SކӨ--5;ƹm\%'꺊Vovf䯱 5tV10U=ͥVg/R4H%hhokn==xCں}CI-:ܣM{#NU\"eBDpl<<) ܔbjׅIaԈ Z|C2QA-fYYCƜ 05n894֤rү_|RNQ{ N.4%o,%9ԤWu^ӶÙ3r͂՞ 蔄FrD!+l8,,/P POA҉rQeK*E"ͯXO_GJm5c4d/R|2I{9h17먟0ٕuO?U$ҽ'IWrq%l5v&=򾹏 :]rEU~#Ԯ4SLDI~.5:]9UgIp’r듮*е7|påZ߸S/% gRWYe2_Nz+Oy8*MlZ[`C+%Q<ٚU[YFZ6xx<⤾Պ10vG`/IST}Nͳt[ *3~Ͽ7o>*"EAHg%&ͯB꟱?}ǛqwD:\Y/_F7zU2GgݟewL-S$46(mqWe^YS ֪v{m A2eCն0x$)ME]޷!٘VÁ(-|UuE\<=~)'gǻv7{LX dW05DCҐGF>󒢹w.q4sQOh[pg #|l#,X?9nDjFL2p?ڂCL>Fkj.vP5R9~S jD_|VH}1G=Ѯ*b?i2Zt'Ð7iy#vQfpP f"/+&Va'ͨ.a1gBڎxEU/'$ 9{v@nsNcm*.#>G-G.תeaҕ&ͦ3A^.`@^ vT+gwxp9!Ը-@Qs2HoB[9C9y7H9ԁJHю:22^QՂf͌.b{P&[yJTu|8dh~yG:G½kn%B.Ē@$|'7A⾦)'+Q߫Xyeq=F7qP-V=?42p1`vaǟh+B~1~/b?kALSc#VEx :Jgh׽ܬ^ijPlf5YH+fv>4w;6w>pe$V9>戠C!1⌇@vd byhΦS3>ܶ`07@˱= QDQ^`K! *ߜ'& D2KKx%}ŀSC3>O+`bF=:5N| 6lHdπ.ZmzQLntg8 NZb+%Hjh\j.xt=¬j`:" X lo^>l*aܣӠ\t\1Ňf߆AZC7%3ߙ־颠s=ib%^cU{q?5 ne [uj!%t={Ũz~@h`%[U쯭wsUZ>7LțXK&E_1ܰ샮U΁DQ6KRX*Bf"\b4uGZSFR~ fe$%EPf$y]Q{XqT_h1Y\p GfM6F=ñ2\cQOό(Z??5M?:Uqq{@6? )sC}vbND.+ ?7Wԇ<,cuQ*ڃ,{T ԛl%\nJt$f5(_{0s5+$%cn:j{-JZxMҔ`OJ(tR >r+v#LQo9𠟎y3zKk+0S(F` 陸?*v/pP9ggݫ'Q磊 1E;;`k쵸~]bOjyx9j woz}UؓQ>K  !(/"ŧvͪ7+e",}u@^6+EDOۯ1IѠv6"vC{2wfp2焉Xa!CڵEhv6%ט̐ Vt[QW?Fdm0ύf=!`n=,8X05+m15dM>SI(X!- >ݚ~>7h/OA2#Y!^Z"CªXݬݴƖc\*)c)5@a0![BW #LMopȘ =0jԼTGQ MŴ$J Y2%T:kʵ,.ɮD~k>{ 9ޚ8C@en[NތfTPMGS]j<п@_euS5c5v{l<†NlU)_$e@.i0^0FA z^"͸²_k0jae3-xAuk~D0խ;0RԈ" Й*;ӝWTlw''I~~Ց!YΌYSЄ0c}P~k/Aw.#{x7o UYYylG $PWUU(~BmN Ar+ؙ{"5Z.-tl酹J0-խ*)o>*tgX:K$1*Wf^BAU2CThIj KqxwVR S%鑍tZt ?w|3l?(H,_s~*)gBGKI v;dj6oP>'^(F̣1؎z;vԍ9W +&}Rj x3c56DTK#$XW^<`;Ed%EB,ygy/ʠ`s5YD#Om= jlY?KD Ap`2{ ev^8 /Bzhh[ҭgLV,ZAjzO8^DZ`;brr<!D 8F̗3Ga؞+5tU:+*;Q$3h6) \3ӑvF[f8+rxOj)t'i~Hmf⼇>EԘv@kG $iNM6U4#HoiXu$7Z1.h MpmJߊbq۟ [c(|s?j}'-kh#:6hFz%VO0tZ0DUq>2ND'a6Џ,y 0$\](M˂>d&a8h#md6W(2|sEÁ TԮu(PDl~~QI+rsAzl/FD>Kv1tHXJ|Yk}rqD^x}{8ThЄϳ2SrPPAklrezY>}B%ҟ6y٨;S C B~Zݻ|9"383Po 諜T #>4w\j\^MCF(gt+Yjq{6U'H_YX%2Z0ҡ @s\K K +8djrŭ8IV7<4}VOКVt}aj))wpQ]B̥[G# $44w,i8HDv hگtKEpGX=qƵU7z~<ʶWc>52z|7Q%l*lbӷ)R$D|3I 9@f%&4:j;jIeH9+6amwU$4LmlGg}P.og$s?Uc=_qz UH_CN`OpjL.ko9Ӕ4Wޟ rV6߽5s¡#Hi=~1ۨ{ &aŇjXyZ); ~d& ^O(ZkzZ=״_ѐwdbr>-ȚfG%[VqzqT<@JV#o|,CbmFܥ ~f#$q!e܎0y:yoa+ӼH%[}q,Q:LMĴJGjц$tsnP[Bq0hΑ{wfjvVk*'oJBWg})>yqECƦeX>Iʫh̗$}F*7LtӢSh ^/# ul@G+vv Rb% R㐁:ֳ>x7i#DrݢfF"Vu@E"TiPqѐo^x+!ѡR ۪ q.fGf-Hf) 6^Ќ)};V Ҹ0V[ٛ*\̟ϸi!/ 0'|rf!4,K냮௭3!H*qX>'%*bMm<0s7.;A%,2ړdG3> bֆ^B5Vz~_cW-B tF#Y8XBGB;؊cP` FP}-FVH8brzY5J/ LN;'hgw5"1 2B?/+4rHӐr?X]|ŭy7ciܿ MīxF[+SXZ)bǞsEwPrg '^@؀t ZmKo][<ɳesynWƐN_ 1"]=fXKQ^EyDs%O,aΰQ2%b8*ZiY鳻u]e u !|b<,۸^VxiC%\V }ȩ2dw92 &~:ebŨZʒ^ {- d;=*TBئCݨ8ּNWg~ gGk.v\EQlÜ,KկU;6r]g`KC6e !͘>dTBT_qx[pqI8Mr-|@ =0>˷P1E7^>f&.n@;,$_endstream endobj 171 0 obj << /Filter /FlateDecode /Length1 1193 /Length2 6984 /Length3 532 /Length 7746 >> stream xeX[qJ-'^4@ Z-ŋ;EK)xq/Rج?{1x9u% jv s)Ta 5`N#6q `%, q qR{G3I"~-ffbT6q>03!3ك(5ZԀ:A]0h3sB-av_,@.JBLL2,#@sxЂ>8a?˺*&kH-mb {30:g+?ޔ0*8afvp(O$ sͬ&p'q:y>@Iu] Y ]7Èaל‡Ͽ CK arMM<xy^` ?q!@8:S^ jW uDA;这rv3EJ&~ H7=7AR$i&n i7=579b>X[k>"]?A|r&>l7r=t5D{sٹAF8:B?\CP34L8:.W&wى2 h56UcwYpұ~DJ}BL` +OW*[~#|!bN`ftfmѬ׳ӟWL:68Ŝ~# `T{{%vzO%O;r7G޹4n*:*zn:|Yk;ܸҀŎzn_v%|$ {ШD\7LH::UDMj+Apd[naw"IK6sj"rO,V^*К49\TL̼ew J&oKsi;]9QJo@Q~ wiXj~}/mXL\兴;tB)n-z/ðkcU1u6v\yS Ieet.Y<7lLdfg7'W0uoiv}NĥycyvjQ'?F&5]1S \vIw ؎6oo1O;Q-TUK/ ^J5+SaV +=q'^c_}Fj$^q v^#DDސl_if$hY5BU s1'&AVTp}Ee)]smE5ύsό܁˜̮%S UKxSH~*D ڙ2e]Cp7F;d?hq3T;ei.CkT$,@-/w4ezk\\Ag -. oL&s~=CϲwaWU>b; 4ni`d8 Xw^k z絧!X Vt0`Kr'ٶױ^ھ.5!vO)2s[l5}:M%K#>w,S?WӱjN]*$5ɽCmdՄ4^FN _S:|7l~؍-~ʏꄦ.l:" O=Y$Ggw? P;x{! 8$- Ge1}^E _ÂcklQzu)ɒ(Q{jI=Eg рQUt0sCNngS{eى?Eq vv+bPkʷ`W]^Oi3>,=9#J&DE,fpS=kz\;yXD_ȪMrK `MfNV$z剾ƀpc00xf#djb$ lnY6 9#B4 0C!ej8zZ!FG6tu ۯMR ~:*fVh&r :s)ob' "땿0~hpxܼ#!up:}7@ͪ™mp_Bc삗Jʭ[Ų)PŠ`ZOp7RKD''tDӡ7 9,g׭GzT j0z,~nzP'!$0_НӬgE|SlmYCԤH"IQw%(Kp4.AxiQn b Z{-1a"1[d.b2}l<}w!d߁q򉈰FC1f)5/V>F^q[4s7i;rk1sM/#yJITKGL }O32>^(x3hLm1ѱiCwx=@Aj( dvȱNL?V` )N%+R -[<.GBHgϋ}9)UBuPiƍI۾v4(PjO+n6]{s8x 犱qўdxobJ:Lj7L,k aLؿzzIR. y>q }-ԯ8>费CČJ_Vzb,!YG- $$ذMϫ|KI"CUE(Yg9?<ٯڿ/pJnɮ%+/|Po\dju6RjTR DՄ)Фnb+ f'Rn0ibTZb<`U[hYC$ n_#@ J*y reR4P>x~[|8|$W .#<2z޻,WaK(tq6#|1v9uOf${YJ"po{) jmȲ;eu} mL#MLQ69!)xAmrGNp#ޗu=87rZ9;,V{h̳a颜Y ,E**6DJIS0|-rڸ=7>^SŽ0wD('>F>B`:.N]7 aR-\fOŹ@*O_AzLf15LOiPxp|#31FW{s[G %I-*ӂ/_# 3ª-zS4ݱuNKL:(ZK7s}O p*aXTjNX?Vuz{a}+}K+}_B#c$)dE֢9'|g>•h]"iޟ#ͅ_μ2KWbFVWgQ5.q".W=>q*gJi'z+oE5q=a py'4᠖u0qT.Bȼܛfd 5k@2V];C|7ި sflpL"JsZa?& 2ܤ_KLOvGʗBV\UNڻþ,qF{G{v;JkyZp j+hvxa7*H׎WS;9O= ei>#Ѳ2mi]=8`}y:Y:w8\g7UqqεVs{؈>tJk@N(oOy}4yXAK(ϵμ_mpD]I:]h4(H("]|&Vwcy=44XQ | hoytˀ1!ɞs4ke1Lm˹Y!LLM\t7K`|^Deycߟ'nN@ )̑ KO,{G LFS}hYkrq{+\GM ݳB`c1M cxY[u.Lheɿ~i;ΓD9| bx;=\(H?{zJar\Fxj{}~K'ԖuHem Y}%NZ9 Wrjd}]!w;K^hKVJڎ9HZ_0 ;I`/x\;rT.| *ayBw /MNT.\}G$=גP Ot0P!>J\hVf%OϤ';W Fobe`IZ6yTO.JnRbt8pcH,*c&0^G끦dF:}V//.>u{'38:q.D58jo,Z[cp).o oث1 hH(IQEi`jEk]'֖YisP77G\qxJn48l*馠8y~kwhto.'OLp ҩiDvf A|-bxV%7baDJS:Fd'qK KznGDZorj=+SӞwjxv`qYV*Vq')!IW Ky:Ž+78H8u3Oiiɏ.~_Wv$!'f O5f\ մ2=+%%u%>.k`ϞH&$t@3RIF%,9`icܜ? P=nW⅄.w~K4cЈDEBpvSRNLP[!qyjQD1gE@Rl|gN !$vmqGYoc;RdĉKyb [*ŤZL&& ӁGOw`Җ,ůnZjm{uICHb*;[ vϼ`y \\yuIX%a}a:ݯB^Q3&ZB/֭hk+'G#DFf2TnOJ:*R [C0.8MbuR?'8BLA/L%",t&3QIRD*9kAo+i⠡\J`'}2G )hXs<> !gO~zEQwcYw'aA?Ł[204 [h~ ,<.M-|fm8#'~\9F5˰z)՟5,Is+?v{*/1;'qx͝d2 )U [*>KL Mr>. h4*FdJ.0|(B]m5=nqz$Էj4'k8-Sc./*{n\wxBQb[Wv]1.ͷ!< $?tzSxg 7@Lc݌+ SxWꩡr̖B* +iT}vW7D!L4Ya_\7[L|\#^ ܎k}:~/WueA d?ɬW))gABM蛭'O'b iⷸ> stream xڕUMo0WD؎zk+  nĥpXvSu٢diſg3ͶTЃgfЁzY5e:I /(kǎ;|F\X}MAn R![IYj9i\1O띌aWӳr*nCvR%-K8qqNI'/HQ[b&zWXrJ ‘H}7R9e4{ikʰ+,x 2X =@0oW'cbhUfE=K}2^b-YtnE ։Ӷ_q\mV+cZGWƓ':u^J;?q7ud/UCi IpC| ٖ>kݤ9r=32_gHuZCf6]h](AFu.:B@Y> stream x313P0P0Bc3csCB.c46K$r9yr+p{E=}JJS ]  b<]00 @0?`d=0s@f d'n.WO@.ʛuiendstream endobj 174 0 obj << /Type /XRef /Length 192 /Filter /FlateDecode /DecodeParms << /Columns 5 /Predictor 12 >> /W [ 1 3 1 ] /Info 66 0 R /Root 65 0 R /Size 175 /ID [] >> stream xcb&F~0 $8J 3Y R8tjl RŚ|h 4+PQ(Fb@$u0[D3Hns0i "91s,"Y@88MH l0N> stream x|Mϭ;R=AiYWbI@A}!և\: Zjn~ryݫ|W_O~Bޢ7?x˷fOjkg,~,i:[?owz2INom[|<?4i;wqo_k~oϚ%|_}}Om~oPS٭뿠fݾX?z[WJm֝SΞ+Sh~-#Znŏ[|97uo1V1~5_pZkxpݭ}1s㫕W)ٽoM^Ul3^9NSo~B~qj|̯׿oǟ͟iۊSwnzJ֩FZv .-Ǯ5Y.\p +EY s|Tgճ"ܬT)ϩT9`.}/kYڃ/~3?Kް۩-%zťq{hZ6kκz ُ̃6)N;+B_X?K-?%K;4~k6ȑl۞ow>?&c`3-A}慄hSZO1{pըڸn1|R?X˝0>Xx7YsIm6Kdy-^+n:s[,!ME/dP&K1M&˓d~l<+ oZǕsg̹",͋,v7X,sE?l1-K7-yo7FlD'殕5zS5eHrQ.yݲkgPw9&um 0%Y9OCKsjyN!_܍RTvz&-mC h8^!) o.  [[;ldkADבMt.Ltn61ͭ@5iNk>MtZ&:mѥ(4]#ņ&th4yid3pL?&:i&uDWvM;Q'dS&&;wOemsxp}aO]qos[1׋~˲){E\ N#+cgA:+m¦:tJF:+>,t!6)(} y{:M9as@z*Ӝk-s}x攽dS˓;{&y<6Ǹ.NAO򺦸BuaW^ gJ+>=F˛+>JK3%~;K TeZ6nˁ!+- Mit^x(vٔüpOgvCk(Ł+s]e\!\:ks=Q+vLp<ܾN\ٜaͥ |uzr+ǚ.=`qm/bsA9)/1kTic@gmvn9Mecscp;ܧpO emnUO:_ɝ| grkU= ,`hj$es:[7FmKu]"H9eսuCUu׻y:ߵɕkw:N^q?HXN? kߤ|Pn;O sw]QTi=d# 1+.̺s{k7vm;õlnXU-#Q VjW睏隕]ᚩU>eWr9:+R>d oE .'Vvaѳe⡓3-+gu ̲ٖQZw1pQn69una6`Q[#6ݜ~[cmhaֱYPq[[^ub yN5غ},s2]7p~ \ U ξ$s\s"e0(io (kݼ`\CT?tcLP*XM\V;[nE3 }D@*8{SѵnWU*-g]~rb'eypͫno8D5yJN,2ƓX-X[:-7Vƅv qTI)xxĖfp͊k[1ˠ;N IuJlm6V{XhQ~Ž7@E*.yY=\NO4hE.E+ݦua .dh}Sfs|Xbp9ƨ57F jyj|5\64rxh=i.IYFDطae1$}H(w|,0o:8}\{I齙K u;XF0 Y O6h`}IY$V?dnv рAZ p$or%r{$볐}cJI-)P쓤Tw'Yˢ>]8|{iYRigqC $[v L7[v9=(+2E]}1#g}>emay҇;uEPv0v[v"ޢc ʦdӨ+ڠ˵ŒVjgWߦ_@tasdsúnOᲣy7e"շS:AGwU9A5'AX? R}"}' Ar~`VlN!EahE+g/>I(W z TpǫEtr5 (zJ׾9* 9RL96viODȈ_L0Xer-V~_B+=u7WqHN/=mC8e|R8{&WsgWF+8pD;+ `IDkqç` qNWEg3Ing$qc "s=²<n3>*/3Ì[e*npvŲaL]q֊ iIp&p-Ic6T)#A S0W\4&qO8X[{\|4_-ϦX IVfs7cHɩm.tHp3и3Vj8wD׉yQWWlUXq 앴UY*`Zn9)ڜqf57Ϭq Uft<9׽׺<;61p <3\7ERǓTrV-YN>c\s cS!Zq3{QJi_o`aKqr7U1 aR[lfq@?_8p8-]qDqpq|q7,ǭ)7}܊-̭0^M&G`N8Y*k;S!¦,Q$MCYE&A_2%Z141?ib.T\BPc*[Aw) E; h*3m57Pd1k(gXvZq\bxf՝dj&jqɤ阋N&}LHt1LOZyh@Ndd}$&\Au`nH" W\`rV&E 1k\ÂM"eG$rD"".I!_avE rBcL 3Ht;I뉐\c)1Լ8%8N٬xcyu9٬1XH'g8㔏'VO -e"0Unm/t=2#8}J͸bqDyܮ&:Y,Cl~E qM'Ȁ*vriWuv[bjT%Vw/7QTX)[D~O Rqp>EPS=\pUt侁D+EGs=.n\X<9"i0PFRLy+֒"7WOp=Z=HR_4Yd/LBp*Uug9p~X{·gW]Gh ojӺJ7cv U WW\q:/ 0sCqZ{'5w/ # ab{niYFS%rq6CNZr_:!GY#&̻8_r,N2:rCλNZQ39O%!lWr|8^,.9|#!Gq␣60GfkA3r.Yu1&uk rbAN 68Pb7?d8ucT)[q/NgVJt;8 MLJLb @!ţ8KD2bS`ȁ' r] 4QpìGsWq⑩!9ʚ%7~Y`ȩk9M,G޼p$+%KƈG\MԒGNmxd[EJ uaFF(YdO-2{[kW>@MAqg%sdrǟ 9LoNǁs?Λ&/u ̻U0z"9pbS W %<g20teI+rA83z%6}[6Wf7T]P2YȊY)[Nirdn9e[+qdn9 sGߓ]{fe-3]rtd'2l&Ll{š6(5 +9I-康^);Gl3!1 dacDw~MZ曉(AC/+fr8v|s i$9͢&& D_l^_7 QQGFL4}4e 6k.!*ARNf**R1|31~&S]ˊ[NIjjN%1vyeC+YCWr`&| _\)Ǽ1攉"A8es"S$>'K S^Q6[+l(G2^/". z8=x=0^/f4ǀ׃?|qïB, B4sd=oݵaPN?0 %z77 OE7KA כb*o:Z]j_t4P{VL9|9ү1U{MFؼ9C,{ş69.ps_s,C9揯9̦58 шe~Wz1ی 䋎M}k2Лhϸ? ֥Л>C hD )t)u逡k\ Z7|tf# o:!MG)O H!hs;OK Fk)0I}@N^cmBU:$BX6`B'^2!Qhd3$f`% v3WnqBtPA;7vǼa|!@!w F>n9+ ?-nx &c_r4Jc:MTv3@jl[nz7l_^6C?Jz[z7ݵch~oܵŖkv/j+Sچ6f~Q[3)mksVULzQqeO!~}9jsWy=]J>1p[;.]M$H8JW˵VR^Tpw.~HpxA8os Oy> /Contents 6 0 R >> endobj 3 0 obj << /Type /Pages /Kids [ 5 0 R ] /Count 1 >> endobj 1 0 obj <> endobj 4 0 obj <> endobj 11 0 obj <> endobj 12 0 obj <> endobj 9 0 obj <> endobj 8 0 obj <>stream xUU{\aav`%(AE`,X]>peW@G`AE&$&pAH Ķ]"hXQ]QDVҞ!YL_;9w% K $I۰O>wh5ys&DS-i"~)%j!?ĺj*hIP!"ɍeeeoIOMI]&,suwߎO``tͦH4驙GFI k)T\J֨aJV^Mʗ,rz{kr UfT.MФiU9I}fZiN.])%X"'"BA,!ф$܈P%&#1D-aG0P,’#!RF"--,E^um1VV唘rljPg÷J>;Î>{-e=0tHPu]7Nd.)@w"o`@> nI)剌Ίoj$۾Zѩm GHrCTB0VGfXulFt;d&ِC>yB*~.yQ˭Os&LAm^憕)輑ϟWf1sVR1_=7fcf 3([W3 x+1|^M_NxH֌qc BƢ;$̛)qS> A`\ l`OJJ^0̍.ndޘsc>L^`kX4s)d@NuYaD"'URL] Nn31=P>ŠՍ[hx_>&[?Gyexg6FDaOlƟ "ČBkV<qQ/f S| HQ9&q^r犗anVނ|Oر2hbX xj-7'7#;z.:B^mU5D>(r:6G-xmvUGZ۞0_n> endobj 2 0 obj <>endobj xref 0 14 0000000000 65535 f 0000009358 00000 n 0000012997 00000 n 0000009299 00000 n 0000009406 00000 n 0000009139 00000 n 0000000015 00000 n 0000009119 00000 n 0000009844 00000 n 0000009537 00000 n 0000011962 00000 n 0000009475 00000 n 0000009505 00000 n 0000011941 00000 n trailer << /Size 14 /Root 1 0 R /Info 2 0 R >> startxref 13047 %%EOF gbm/src/0000755000176000001440000000000013064145661011644 5ustar ripleyusersgbm/src/locationm.h0000644000176000001440000000157613064145661014013 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM alteration by Daniel Edwards // File: locationm.h // // History: 27/3/2008 created // //------------------------------------------------------------------------------ #ifndef LOCMCGBM_H #define LOCMCGBM_H #include #include #include #include using namespace std; class CLocationM { public: CLocationM(const char *sType, int iN, double *adParams); virtual ~CLocationM(); double Median(int iN, double *adV, double *adW); double PsiFun(double dX); double LocationM(int iN, double *adX, double *adW); private: double *madParams; const char *msType; double mdEps; struct comp{ bool operator()(pair prP, pair prQ) { return (prP.second < prQ.second); } }; }; #endif // LOCMCGBM_H gbm/src/locationm.cpp0000644000176000001440000001152113064145661014335 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM alteration by Daniel Edwards // File: locationm.cpp // // Purpose: Class to provide methods to calculate the location M-estimates // of a variety of functions // // History: 31/03/2008 created // //------------------------------------------------------------------------------ #include "locationm.h" #include #include // for fmax2 using namespace std; ///////////////////////////////////////////////// // Constructor // // Creates a new instance of this class ///////////////////////////////////////////////// CLocationM::CLocationM(const char *sType, int iN, double *adParams) { int ii; msType = sType; mdEps = 1e-8; madParams = new double[iN]; for (ii = 0; ii < iN; ii++) { madParams[ii] = adParams[ii]; } } ///////////////////////////////////////////////// // Destructor // // Frees any memory from variables in this class ///////////////////////////////////////////////// CLocationM::~CLocationM() { if (madParams != NULL) { delete[] madParams; } } ///////////////////////////////////////////////// // Median // // Function to return the weighted quantile of // a vector of a given length // // Parameters: iN - Length of vector // adV - Vector of doubles // adW - Array of weights // dAlpha - Quantile to calculate (0.5 for median) // // Returns : Weighted quantile ///////////////////////////////////////////////// double CLocationM::Median(int iN, double *adV, double *adW) { // Local variables int ii, iMedIdx; vector vecW; vector< pair > vecV; double dCumSum, dWSum, dMed; // Check the vector size if (iN == 0) { return 0.0; } else if(iN == 1) { return adV[0]; } // Create vectors containing the values and weights vecV.resize(iN); for (ii = 0; ii < iN; ii++) { vecV[ii] = make_pair(ii, adV[ii]); } // Sort the vector std::stable_sort(vecV.begin(), vecV.end(), comp()); // Sort the weights correspondingly and calculate their sum vecW.resize(iN); dWSum = 0.0; for (ii = 0; ii < iN; ii++) { vecW[ii] = adW[vecV[ii].first]; dWSum += adW[ii]; } // Get the first index where the cumulative weight is >=0.5 iMedIdx = -1; dCumSum = 0.0; while (dCumSum < 0.5 * dWSum) { iMedIdx ++; dCumSum += vecW[iMedIdx]; } // Get the index of the next non-zero weight int iNextNonZero = iN; for (ii = (iN - 1); ii > iMedIdx; ii--) { if (vecW[ii] > 0) { iNextNonZero = ii; } } // Use this index unless the cumulative sum is exactly alpha if (iNextNonZero == iN || dCumSum > 0.5 * dWSum) { dMed = vecV[iMedIdx].second; } else { dMed = 0.5 * (vecV[iMedIdx].second + vecV[iNextNonZero].second); } return dMed; } ///////////////////////////////////////////////// // PsiFun // // Function to calculate the psi of the supplied // value, given the type of function to use and // the supplied parameters // // Parameters: dX - Value // // Returns : Psi(X) ///////////////////////////////////////////////// double CLocationM::PsiFun(double dX) { // Local variables double dPsiVal = 0.0; // Switch on the type of function if(strncmp(msType,"tdist",2) == 0) { dPsiVal = dX / (madParams[0] + (dX * dX)); } else { // TODO: Handle the error Rprintf("Error: Function type %s not found\n", msType); } return dPsiVal; } ///////////////////////////////////////////////// // LocationM // // Function to calculate location M estimate for // the supplied weighted data, with the psi-function // type and parameters specified in this class // // Parameters: iN - Number of data points // adX - Data vector // adW - Weight vector // // Returns : Location M-Estimate of (X, W) ///////////////////////////////////////////////// double CLocationM::LocationM(int iN, double *adX, double *adW) { // Local variables int ii; // Get the initial estimate of location double dBeta0 = Median(iN, adX, adW); // Get the initial estimate of scale double *adDiff = new double[iN]; for (ii = 0; ii < iN; ii++) { adDiff[ii] = fabs(adX[ii] - dBeta0); } double dScale0 = 1.4826 * Median(iN, adDiff, adW); dScale0 = fmax2(dScale0, mdEps); // Loop over until the error is low enough double dErr = 1.0; int iCount = 0; while (iCount < 50) { double dSumWX = 0.0; double dSumW = 0.0; for (ii = 0; ii < iN; ii++) { double dT = fabs(adX[ii] - dBeta0) / dScale0; dT = fmax2(dT, mdEps); double dWt = adW[ii] * PsiFun(dT) / dT; dSumWX += dWt * adX[ii]; dSumW += dWt; } double dBeta = dBeta0; if (dSumW > 0){ dBeta = dSumWX / dSumW; } dErr = fabs(dBeta - dBeta0); if (dErr > mdEps) { dErr /= fabs(dBeta0); } dBeta0 = dBeta; if (dErr < mdEps) { iCount = 100; } else { iCount++; } } // Cleanup memory delete[] adDiff; return dBeta0; } gbm/src/gbm_engine.h0000644000176000001440000000622613064145661014115 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // // File: gbm_engine.h // // License: GNU GPL (version 2 or later) // // Contents: Generalized boosted model engine // // Owner: gregr@rand.org // // History: 3/26/2001 gregr created // 2/14/2003 gregr: adapted for R implementation // //------------------------------------------------------------------------------ #ifndef GBM_ENGINGBM_H #define GBM_ENGINGBM_H #include #include "buildinfo.h" #include "distribution.h" #include "tree.h" #include "dataset.h" #include "node_factory.h" using namespace std; class CGBM { public: CGBM(); ~CGBM(); GBMRESULT Initialize(CDataset *pData, CDistribution *pDist, double dLambda, unsigned long nTrain, double dBagFraction, unsigned long cLeaves, unsigned long cMinObsInNode, unsigned long cNumClasses, int cGroups); GBMRESULT iterate(double *adF, double &dTrainError, double &dValidError, double &dOOBagImprove, int &cNodes, int cNumClasses, int cClassIdx); GBMRESULT TransferTreeToRList(int *aiSplitVar, double *adSplitPoint, int *aiLeftNode, int *aiRightNode, int *aiMissingNode, double *adErrorReduction, double *adWeight, double *adPred, VEC_VEC_CATEGORIES &vecSplitCodes, int cCatSplitsOld); GBMRESULT Predict(unsigned long iVar, unsigned long cTrees, double *adF, double *adX, unsigned long cLength); GBMRESULT Predict(double *adX, unsigned long cRow, unsigned long cCol, unsigned long cTrees, double *adF); GBMRESULT GetVarRelativeInfluence(double *adRelInf, unsigned long cTrees); GBMRESULT PrintTree(); bool IsPairwise() const { return (cGroups >= 0); } CDataset *pData; // the data CDistribution *pDist; // the distribution bool fInitialized; // indicates whether the GBM has been initialized CNodeFactory *pNodeFactory; // these objects are for the tree growing // allocate them once here for all trees to use bool *afInBag; unsigned long *aiNodeAssign; CNodeSearch *aNodeSearch; PCCARTTree ptreeTemp; VEC_P_NODETERMINAL vecpTermNodes; double *adZ; double *adFadj; private: double dLambda; unsigned long cTrain; unsigned long cValid; unsigned long cTotalInBag; double dBagFraction; unsigned long cDepth; unsigned long cMinObsInNode; int cGroups; }; #endif // GBM_ENGINGBM_H gbm/src/laplace.h0000644000176000001440000000572213064145661013424 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // File: laplace.h // // License: GNU GPL (version 2 or later) // // Contents: laplace object // // Owner: gregr@rand.org // // History: 3/26/2001 gregr created // 2/14/2003 gregr: adapted for R implementation // //------------------------------------------------------------------------------ #ifndef LAPLACGBM_H #define LAPLACGBM_H #include #include "distribution.h" #include "locationm.h" class CLaplace : public CDistribution { public: CLaplace(); virtual ~CLaplace(); GBMRESULT UpdateParams(double *adF, double *adOffset, double *adWeight, unsigned long cLength) { return GBM_OK; }; GBMRESULT ComputeWorkingResponse(double *adY, double *adMisc, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff); GBMRESULT InitF(double *adY, double *adMisc, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength); GBMRESULT FitBestConstant(double *adY, double *adMisc, double *adOffset, double *adW, double *adF, double *adZ, unsigned long *aiNodeAssign, unsigned long nTrain, VEC_P_NODETERMINAL vecpTermNodes, unsigned long cTermNodes, unsigned long cMinObsInNode, bool *afInBag, double *adFadj, int cIdxOff); double Deviance(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, unsigned long cLength, int cIdxOff); double BagImprovement(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain); private: vector vecd; vector::iterator itMedian; CLocationM *mpLocM; }; #endif // LAPLACGBM_H gbm/src/tree.cpp0000644000176000001440000002666013064145661013321 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "tree.h" CCARTTree::CCARTTree() { pRootNode = NULL; pNodeFactory = NULL; dShrink = 1.0; } CCARTTree::~CCARTTree() { if(pRootNode != NULL) { pRootNode->RecycleSelf(pNodeFactory); } } GBMRESULT CCARTTree::Initialize ( CNodeFactory *pNodeFactory ) { GBMRESULT hr = GBM_OK; this->pNodeFactory = pNodeFactory; return hr; } GBMRESULT CCARTTree::Reset() { GBMRESULT hr = GBM_OK; if(pRootNode != NULL) { // delete the old tree and start over hr = pRootNode->RecycleSelf(pNodeFactory); } if(GBM_FAILED(hr)) { goto Error; } iBestNode = 0; dBestNodeImprovement = 0.0; schWhichNode = 0; pNewSplitNode = NULL; pNewLeftNode = NULL; pNewRightNode = NULL; pNewMissingNode = NULL; pInitialRootNode = NULL; Cleanup: return hr; Error: goto Cleanup; } //------------------------------------------------------------------------------ // Grows a regression tree //------------------------------------------------------------------------------ GBMRESULT CCARTTree::grow ( double *adZ, CDataset *pData, double *adW, double *adF, unsigned long nTrain, unsigned long nBagged, double dLambda, unsigned long cMaxDepth, unsigned long cMinObsInNode, bool *afInBag, unsigned long *aiNodeAssign, CNodeSearch *aNodeSearch, VEC_P_NODETERMINAL &vecpTermNodes ) { GBMRESULT hr = GBM_OK; #ifdef NOISY_DEBUG Rprintf("Growing tree\n"); #endif if((adZ==NULL) || (pData==NULL) || (adW==NULL) || (adF==NULL) || (cMaxDepth < 1)) { hr = GBM_INVALIDARG; goto Error; } dSumZ = 0.0; dSumZ2 = 0.0; dTotalW = 0.0; #ifdef NOISY_DEBUG Rprintf("initial tree calcs\n"); #endif for(iObs=0; iObsGetNewNodeTerminal(); pInitialRootNode->dPrediction = dSumZ/dTotalW; pInitialRootNode->dTrainW = dTotalW; vecpTermNodes.resize(2*cMaxDepth + 1,NULL); // accounts for missing nodes vecpTermNodes[0] = pInitialRootNode; pRootNode = pInitialRootNode; aNodeSearch[0].Set(dSumZ,dTotalW,nBagged, pInitialRootNode, &pRootNode, pNodeFactory); // build the tree structure #ifdef NOISY_DEBUG Rprintf("Building tree 1 "); #endif cTotalNodeCount = 1; cTerminalNodes = 1; for(cDepth=0; cDepthWhichNode(pData,iObs); if(schWhichNode == 1) // goes right { aiNodeAssign[iObs] = cTerminalNodes-2; } else if(schWhichNode == 0) // is missing { aiNodeAssign[iObs] = cTerminalNodes-1; } // those to the left stay with the same node assignment } } // set up the node search for the new right node aNodeSearch[cTerminalNodes-2].Set(aNodeSearch[iBestNode].dBestRightSumZ, aNodeSearch[iBestNode].dBestRightTotalW, aNodeSearch[iBestNode].cBestRightN, pNewRightNode, &(pNewSplitNode->pRightNode), pNodeFactory); // set up the node search for the new missing node aNodeSearch[cTerminalNodes-1].Set(aNodeSearch[iBestNode].dBestMissingSumZ, aNodeSearch[iBestNode].dBestMissingTotalW, aNodeSearch[iBestNode].cBestMissingN, pNewMissingNode, &(pNewSplitNode->pMissingNode), pNodeFactory); // set up the node search for the new left node // must be done second since we need info for right node first aNodeSearch[iBestNode].Set(aNodeSearch[iBestNode].dBestLeftSumZ, aNodeSearch[iBestNode].dBestLeftTotalW, aNodeSearch[iBestNode].cBestLeftN, pNewLeftNode, &(pNewSplitNode->pLeftNode), pNodeFactory); } // end tree growing // DEBUG // Print(); Cleanup: return hr; Error: goto Cleanup; } GBMRESULT CCARTTree::GetBestSplit ( CDataset *pData, unsigned long nTrain, CNodeSearch *aNodeSearch, unsigned long cTerminalNodes, unsigned long *aiNodeAssign, bool *afInBag, double *adZ, double *adW, unsigned long &iBestNode, double &dBestNodeImprovement ) { GBMRESULT hr = GBM_OK; int iVar = 0; unsigned long iNode = 0; unsigned long iOrderObs = 0; unsigned long iWhichObs = 0; unsigned long cVarClasses = 0; double dX = 0.0; for(iVar=0; iVar < pData->cCols; iVar++) { cVarClasses = pData->acVarClasses[iVar]; for(iNode=0; iNode < cTerminalNodes; iNode++) { hr = aNodeSearch[iNode].ResetForNewVar(iVar,cVarClasses); } // distribute the observations in order to the correct node search for(iOrderObs=0; iOrderObs < nTrain; iOrderObs++) { iWhichObs = pData->aiXOrder[iVar*nTrain + iOrderObs]; if(afInBag[iWhichObs]) { iNode = aiNodeAssign[iWhichObs]; dX = pData->adX[iVar*(pData->cRows) + iWhichObs]; hr = aNodeSearch[iNode].IncorporateObs (dX, adZ[iWhichObs], adW[iWhichObs], pData->alMonotoneVar[iVar]); if(GBM_FAILED(hr)) { goto Error; } } } for(iNode=0; iNode dBestNodeImprovement) { iBestNode = iNode; dBestNodeImprovement = aNodeSearch[iNode].BestImprovement(); } } Cleanup: return hr; Error: goto Cleanup; } GBMRESULT CCARTTree::GetNodeCount ( int &cNodes ) { cNodes = cTotalNodeCount; return GBM_OK; } GBMRESULT CCARTTree::PredictValid ( CDataset *pData, unsigned long nValid, double *adFadj ) { GBMRESULT hr = GBM_OK; int i=0; for(i=pData->cRows - nValid; icRows; i++) { pRootNode->Predict(pData, i, adFadj[i]); adFadj[i] *= dShrink; } return hr; } GBMRESULT CCARTTree::Predict ( double *adX, unsigned long cRow, unsigned long cCol, unsigned long iRow, double &dFadj ) { if(pRootNode != NULL) { pRootNode->Predict(adX,cRow,cCol,iRow,dFadj); dFadj *= dShrink; } else { dFadj = 0.0; } return GBM_OK; } GBMRESULT CCARTTree::Adjust ( unsigned long *aiNodeAssign, double *adFadj, unsigned long cTrain, VEC_P_NODETERMINAL &vecpTermNodes, unsigned long cMinObsInNode ) { unsigned long hr = GBM_OK; unsigned long iObs = 0; hr = pRootNode->Adjust(cMinObsInNode); if(GBM_FAILED(hr)) { goto Error; } // predict for the training observations for(iObs=0; iObsdPrediction; } Cleanup: return hr; Error: goto Cleanup; } GBMRESULT CCARTTree::Print() { GBMRESULT hr = GBM_OK; if(pRootNode != NULL) { pRootNode->PrintSubtree(0); Rprintf("shrinkage: %f\n",dShrink); Rprintf("initial error: %f\n\n",dError); } return hr; } GBMRESULT CCARTTree::GetVarRelativeInfluence ( double *adRelInf ) { GBMRESULT hr = GBM_OK; if(pRootNode != NULL) { hr = pRootNode->GetVarRelativeInfluence(adRelInf); if(GBM_FAILED(hr)) { goto Error; } } Cleanup: return hr; Error: goto Cleanup; } GBMRESULT CCARTTree::TransferTreeToRList ( CDataset *pData, int *aiSplitVar, double *adSplitPoint, int *aiLeftNode, int *aiRightNode, int *aiMissingNode, double *adErrorReduction, double *adWeight, double *adPred, VEC_VEC_CATEGORIES &vecSplitCodes, int cCatSplitsOld, double dShrinkage ) { GBMRESULT hr = GBM_OK; int iNodeID = 0; if(pRootNode != NULL) { hr = pRootNode->TransferTreeToRList(iNodeID, pData, aiSplitVar, adSplitPoint, aiLeftNode, aiRightNode, aiMissingNode, adErrorReduction, adWeight, adPred, vecSplitCodes, cCatSplitsOld, dShrinkage); } else { hr = GBM_FAIL; } return hr; } gbm/src/gbm_engine.cpp0000644000176000001440000003063213064145661014446 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 //#define NOISY_DEBUG #include "gbm_engine.h" CGBM::CGBM() { adFadj = NULL; adZ = NULL; afInBag = NULL; aiNodeAssign = NULL; aNodeSearch = NULL; cDepth = 0; cMinObsInNode = 0; dBagFraction = 0.0; dLambda = 0.0; fInitialized = false; cTotalInBag = 0; cTrain = 0; cValid = 0; pData = NULL; pDist = NULL; pNodeFactory = NULL; ptreeTemp = NULL; } CGBM::~CGBM() { if(adFadj != NULL) { delete [] adFadj; adFadj = NULL; } if(adZ != NULL) { delete [] adZ; adZ = NULL; } if(afInBag != NULL) { delete [] afInBag; afInBag = NULL; } if(aiNodeAssign != NULL) { delete [] aiNodeAssign; aiNodeAssign = NULL; } if(aNodeSearch != NULL) { delete [] aNodeSearch; aNodeSearch = NULL; } if(ptreeTemp != NULL) { delete ptreeTemp; ptreeTemp = NULL; } // must delete the node factory last!!! at least after deleting trees if(pNodeFactory != NULL) { delete pNodeFactory; pNodeFactory = NULL; } } GBMRESULT CGBM::Initialize ( CDataset *pData, CDistribution *pDist, double dLambda, unsigned long cTrain, double dBagFraction, unsigned long cDepth, unsigned long cMinObsInNode, unsigned long cNumClasses, int cGroups ) { GBMRESULT hr = GBM_OK; unsigned long i=0; if(pData == NULL) { hr = GBM_INVALIDARG; goto Error; } if(pDist == NULL) { hr = GBM_INVALIDARG; goto Error; } this->pData = pData; this->pDist = pDist; this->dLambda = dLambda; this->cTrain = cTrain; this->dBagFraction = dBagFraction; this->cDepth = cDepth; this->cMinObsInNode = cMinObsInNode; this->cGroups = cGroups; // allocate the tree structure ptreeTemp = new CCARTTree; if(ptreeTemp == NULL) { hr = GBM_OUTOFMEMORY; goto Error; } cValid = pData->cRows - cTrain; cTotalInBag = (unsigned long)(dBagFraction*cTrain); adZ = new double[(pData->cRows) * cNumClasses]; if(adZ == NULL) { hr = GBM_OUTOFMEMORY; goto Error; } adFadj = new double[(pData->cRows) * cNumClasses]; if(adFadj == NULL) { hr = GBM_OUTOFMEMORY; goto Error; } for (i=0; i<(pData->cRows)*cNumClasses; i++) { adFadj[i] = 0.0; } pNodeFactory = new CNodeFactory(); if(pNodeFactory == NULL) { hr = GBM_OUTOFMEMORY; goto Error; } hr = pNodeFactory->Initialize(cDepth); if(GBM_FAILED(hr)) { goto Error; } ptreeTemp->Initialize(pNodeFactory); // array for flagging those observations in the bag afInBag = new bool[cTrain]; if(afInBag==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } // aiNodeAssign tracks to which node each training obs belongs aiNodeAssign = new ULONG[cTrain]; if(aiNodeAssign==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } // NodeSearch objects help decide which nodes to split aNodeSearch = new CNodeSearch[2*cDepth+1]; if(aNodeSearch==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } for(i=0; i<2*cDepth+1; i++) { aNodeSearch[i].Initialize(cMinObsInNode); } vecpTermNodes.resize(2*cDepth+1,NULL); fInitialized = true; Cleanup: return hr; Error: goto Cleanup; } GBMRESULT CGBM::Predict ( unsigned long iVar, unsigned long cTrees, double *adF, double *adX, unsigned long cLength ) { GBMRESULT hr = GBM_OK; return hr; } GBMRESULT CGBM::Predict ( double *adX, unsigned long cRow, unsigned long cCol, unsigned long cTrees, double *adF ) { GBMRESULT hr = GBM_OK; return hr; } GBMRESULT CGBM::GetVarRelativeInfluence ( double *adRelInf, unsigned long cTrees ) { GBMRESULT hr = GBM_OK; int iVar=0; for(iVar=0; iVarcCols; iVar++) { adRelInf[iVar] = 0.0; } return hr; } GBMRESULT CGBM::PrintTree() { GBMRESULT hr = GBM_OK; hr = ptreeTemp->Print(); if(GBM_FAILED(hr)) goto Error; Cleanup: return hr; Error: goto Cleanup; } GBMRESULT CGBM::iterate ( double *adF, double &dTrainError, double &dValidError, double &dOOBagImprove, int &cNodes, int cNumClasses, int cClassIdx ) { GBMRESULT hr = GBM_OK; unsigned long i = 0; unsigned long cBagged = 0; int cIdxOff = cClassIdx * (cTrain + cValid); // for(i=0; i < cTrain + cIdxOff; i++){ adF[i] = 0;} if(!fInitialized) { hr = GBM_FAIL; goto Error; } dTrainError = 0.0; dValidError = 0.0; dOOBagImprove = 0.0; vecpTermNodes.assign(2*cDepth+1,NULL); // randomly assign observations to the Bag if (cClassIdx == 0) { if (!IsPairwise()) { // regular instance based training for(i=0; i= cTotalInBag){ break; } */ } // the remainder is not in the bag for( ; iadMisc[i]; if (dGroup != dLastGroup) { if (cBaggedGroups >= cTotalGroupsInBag) { break; } // Group changed, make a new decision chosen = (unif_rand()*(cGroups - cSeenGroups) < cTotalGroupsInBag - cBaggedGroups); if (chosen) { cBaggedGroups++; } dLastGroup = dGroup; cSeenGroups++; } if (chosen) { afInBag[i] = true; cBagged++; } else { afInBag[i] = false; } } // the remainder is not in the bag for( ; iComputeWorkingResponse(pData->adY, pData->adMisc, pData->adOffset, adF, adZ, pData->adWeight, afInBag, cTrain, cIdxOff); if(GBM_FAILED(hr)) { goto Error; } #ifdef NOISY_DEBUG Rprintf("Reset tree\n"); #endif hr = ptreeTemp->Reset(); #ifdef NOISY_DEBUG Rprintf("grow tree\n"); #endif hr = ptreeTemp->grow(&(adZ[cIdxOff]), pData, &(pData->adWeight[cIdxOff]), &(adFadj[cIdxOff]), cTrain, cTotalInBag, dLambda, cDepth, cMinObsInNode, afInBag, aiNodeAssign, aNodeSearch, vecpTermNodes); if(GBM_FAILED(hr)) { goto Error; } #ifdef NOISY_DEBUG Rprintf("get node count\n"); #endif hr = ptreeTemp->GetNodeCount(cNodes); if(GBM_FAILED(hr)) { goto Error; } // Now I have adF, adZ, and vecpTermNodes (new node assignments) // Fit the best constant within each terminal node #ifdef NOISY_DEBUG Rprintf("fit best constant\n"); #endif hr = pDist->FitBestConstant(pData->adY, pData->adMisc, pData->adOffset, pData->adWeight, adF, adZ, aiNodeAssign, cTrain, vecpTermNodes, (2*cNodes+1)/3, // number of terminal nodes cMinObsInNode, afInBag, adFadj, cIdxOff); if(GBM_FAILED(hr)) { goto Error; } // update training predictions // fill in missing nodes where N < cMinObsInNode hr = ptreeTemp->Adjust(aiNodeAssign,&(adFadj[cIdxOff]),cTrain, vecpTermNodes,cMinObsInNode); if(GBM_FAILED(hr)) { goto Error; } ptreeTemp->SetShrinkage(dLambda); if (cClassIdx == (cNumClasses - 1)) { dOOBagImprove = pDist->BagImprovement(pData->adY, pData->adMisc, pData->adOffset, pData->adWeight, adF, adFadj, afInBag, dLambda, cTrain); } // update the training predictions for(i=0; i < cTrain; i++) { int iIdx = i + cIdxOff; adF[iIdx] += dLambda * adFadj[iIdx]; } dTrainError = pDist->Deviance(pData->adY, pData->adMisc, pData->adOffset, pData->adWeight, adF, cTrain, cIdxOff); // update the validation predictions hr = ptreeTemp->PredictValid(pData,cValid,&(adFadj[cIdxOff])); for(i=cTrain; i < cTrain+cValid; i++) { adF[i + cIdxOff] += adFadj[i + cIdxOff]; } if(pData->fHasOffset) { dValidError = pDist->Deviance(pData->adY, pData->adMisc, pData->adOffset, pData->adWeight, adF, cValid, cIdxOff + cTrain); } else { dValidError = pDist->Deviance(pData->adY, pData->adMisc, NULL, pData->adWeight, adF, cValid, cIdxOff + cTrain); } Cleanup: return hr; Error: goto Cleanup; } GBMRESULT CGBM::TransferTreeToRList ( int *aiSplitVar, double *adSplitPoint, int *aiLeftNode, int *aiRightNode, int *aiMissingNode, double *adErrorReduction, double *adWeight, double *adPred, VEC_VEC_CATEGORIES &vecSplitCodes, int cCatSplitsOld ) { GBMRESULT hr = GBM_OK; hr = ptreeTemp->TransferTreeToRList(pData, aiSplitVar, adSplitPoint, aiLeftNode, aiRightNode, aiMissingNode, adErrorReduction, adWeight, adPred, vecSplitCodes, cCatSplitsOld, dLambda); return hr; } gbm/src/gaussian.h0000644000176000001440000000547513064145661013642 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // // File: gaussian.h // // License: GNU GPL (version 2 or later) // // Contents: gaussian object // // Owner: gregr@rand.org // // History: 3/26/2001 gregr created // 2/14/2003 gregr: adapted for R implementation // //------------------------------------------------------------------------------ #ifndef GAUSSIAN_H #define GAUSSIAN_H #include "distribution.h" class CGaussian : public CDistribution { public: CGaussian(); virtual ~CGaussian(); GBMRESULT UpdateParams(double *adF, double *adOffset, double *adWeight, unsigned long cLength) { return GBM_OK; }; GBMRESULT ComputeWorkingResponse(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adZ, bool *afInBag, unsigned long nTrain, int cIdxOff); GBMRESULT InitF(double *adY, double *adMisc, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength); GBMRESULT FitBestConstant(double *adY, double *adMisc, double *adOffset, double *adW, double *adF, double *adZ, unsigned long *aiNodeAssign, unsigned long nTrain, VEC_P_NODETERMINAL vecpTermNodes, unsigned long cTermNodes, unsigned long cMinObsInNode, bool *afInBag, double *adFadj, int cIdxOff); double Deviance(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, unsigned long cLength, int cIdxOff); double BagImprovement(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain); }; #endif // GAUSSIAN_H gbm/src/pairwise.h0000644000176000001440000003254113064145661013645 0ustar ripleyusers//--------------------------------------------------------------------------------- // GBM alteration by Stefan Schroedl (schroedl@a9.com) // // File: pairwise // // Contains: Distribution object to implement pairwise distributions for ranking // // History: 12/15/2011 Created // //--------------------------------------------------------------------------------- // This file implements the LambdaMart algorithm for learning ranking functions. // The main idea is to model p_ij, the probability that item i should rank higher // than j, as // p_ij = 1 / (1 + exp(s_i - s_j)), // where s_i, s_j are the model scores for the two items. // // While scores are still generated one item at a time, gradients for learning // depend on _pairs_ of items. The algorithm is aware of _groups_; all pairs of items // with different labels, belonging to the same group, are used for training. A // typical application is ranking for web search: groups correspond to user queries, // and items to (feature vectors of) web pages in the associated match set. // // Different IR measures can be chosen, to weight instances based on their rank. // Generally, changes in top ranks should have more influence than changes at the // bottom of the result list. This function provides the following options: // // * CONC (concordance index, fraction of correctly raked pairs. This is a generalization // of Area under the ROC Curve (AUC) from binary to multivalued labels. // * Normalized Discounted Cumulative Gain (NDCG) // * Mean Reciprocal Rank (MRR) of the highest-ranked positive instance. // * Mean Average Precision (MAP), a generalization of MRR to multiple positive instances. // // While MRR and MAP expect binary target labels, CONC and NDCG can equally work with // continuous values. More precisely, NDCG is defined as // \Sum_{r=1..n} val_r / log2(r+1), // where val_r is the user-specified target for the item at rank r. Note that this is // contrast to some definitions of NDCG that assume integer targets s_i, and // implicitly transform val_r = 2^{s+i}-1. // // Groups are specified using an integer vector of the same length as the training instances. // // Optionally, item weights can be supplied; it is assumed that all instances belonging // to the same group have the same weight. // // For background information on LambdaMart, please see e.g. the following papers: // // * Burges, C., "From RankNet to LambdaRank to LambdaMART: An Overview", Microsoft // Research Technical Report MSR-TR-2010-82, 2010 // * Donmez, P., K. Svore, K., and Burges, C., "On the Local Optimality of // LambdaRank", SIGIR 2009 // * Burges, C., Ragno, R., and Le, Q., "Learning to Rank with Non-Smooth Cost // Functions", NIPS 2006 #ifndef PAIRWISE_H #define PAIRWISE_H #include "distribution.h" #include "buildinfo.h" // A class to rerank groups based on (intermediate) scores // Note: Smaller ranks are better, the top rank is 1 class CRanker { public: // Auxiliary structure to store score and rank typedef std::pair CDoubleUintPair; // Buffer memory allocation void Init(unsigned int cMaxItemsPerGroup); // Initialize ranker with scores of items belonging to the same group // - adScores is a score array, (at least) cNumItems long bool SetGroupScores(const double* const adScores, unsigned int cNumItems); // Perform the ranking // - Return true if any item changed its rank bool Rank(); // Getter / setter unsigned int GetNumItems() const { return cNumItems; } unsigned int GetRank(int i) const { return vecdipScoreRank[i].second; } unsigned int GetItem(unsigned int iRank) const { return (vecpdipScoreRank[iRank-1] - &(vecdipScoreRank[0])); } void SetRank(int i, unsigned int r) { vecdipScoreRank[i].second = r; } void AddToScore(int i, double delta) { vecdipScoreRank[i].first += delta; } protected: // Number of items in current group unsigned int cNumItems; // Pairs of (score, rank) for current group vector vecdipScoreRank; // Array of pointers to elements of vecdipScoreRank, used for sorting // Note: We need a separate array for sorting in order to be able to // quickly look up the rank for any given item. vector vecpdipScoreRank; }; // Abstract base class for all IR Measures class CIRMeasure { public: // Constructor CIRMeasure() : cRankCutoff(UINT_MAX) {} // Destructor virtual ~CIRMeasure() { } // Getter / Setter unsigned int GetCutoffRank() const { return cRankCutoff; } void SetCutoffRank(unsigned int cRankCutoff) { this->cRankCutoff = cRankCutoff; } // Auxiliary function for sanity check bool AnyPairs(const double* const adY, unsigned int cNumItems) const { return (cNumItems >= 2 // at least two instances && adY[0] > 0.0 // at least one positive example (targets are non-increasing) && adY[cNumItems-1] != adY[0]); // at least two different targets } // Memory allocation virtual void Init(unsigned long cMaxGroup, unsigned long cNumItems, unsigned int cRankCutoff = UINT_MAX) { this->cRankCutoff = cRankCutoff; } // Calculate the IR measure for the group of items set in the ranker. // Precondition: CRanker::SetGroupScores() has been called // - adY are the target scores virtual double Measure(const double* const adY, const CRanker& ranker) = 0; // Calculate the maximum achievable IR measure for a given group. // Side effect: the ranker state might change // Default implementation for MRR and MAP: if any positive items exist, // ranking them at the top yields a perfect measure of 1. virtual double MaxMeasure(unsigned int iGroup, const double* const adY, unsigned int cNumItems) { return (AnyPairs(adY, cNumItems) ? 1.0 : 0.0); } // Calculate the difference in the IR measure caused by swapping the ranks of two items. // Assumptions: // * iItemBetter has a higher label than iItemWorse (i.e., adY[iItemBetter] > adY[iItemWorse]). // * ranker.setGroup() has been called. virtual double SwapCost(int iItemBetter, int iItemWorse, const double* const adY, const CRanker& ranker) const = 0; protected: // Cut-off rank below which items are ignored for measure unsigned int cRankCutoff; }; // Class to implement IR Measure 'CONC' (fraction of concordant pairs). For the case of binary labels, this is // equivalent to the area under the ROC curve (AUC). class CConc : public CIRMeasure { public: virtual ~CConc() { } void Init(unsigned long cMaxGroup, unsigned long cNumItems, unsigned int cRankCutoff = UINT_MAX); double Measure(const double* const adY, const CRanker& ranker); // The maximum number of correctly classified pairs is simply all pairs with different labels double MaxMeasure(unsigned int iGroup, const double* const adY, unsigned int cNumItems) { return PairCount(iGroup, adY, cNumItems); } // (Cached) calculation of the number of pairs with different labels unsigned int PairCount(unsigned int iGroup, const double* const adY, unsigned int cNumItems); double SwapCost(int iItemBetter, int iItemWorse, const double* const adY, const CRanker& ranker) const; protected: // Calculate the number of pairs with different labels int ComputePairCount(const double* const adY, unsigned int cNumItems); // Caches the number of pairs with different labels, for each group vector veccPairCount; }; // Class to implement IR Measure 'Normalized Discounted Cumulative Gain' // Note: Labels can have any non-negative value class CNDCG : public CIRMeasure { public: void Init(unsigned long cMaxGroup, unsigned long cNumItems, unsigned int cRankCutoff = UINT_MAX); // Compute DCG double Measure(const double* const adY, const CRanker& ranker); // Compute best possible DCG double MaxMeasure(unsigned int iGroup, const double* const adY, unsigned int cNumItems); double SwapCost(int iItemBetter, int iItemWorse, const double* const adY, const CRanker& ranker) const; protected: // Lookup table for rank weight (w(rank) = 1/log2(1+rank)) vector vecdRankWeight; // Caches the maximum achievable DCG, for each group vector vecdMaxDCG; }; // Class to implement IR Measure 'Mean Reciprocal Rank' // Assumption: Labels are 0 or 1 class CMRR : public CIRMeasure { public: double Measure(const double* const adY, const CRanker& ranker); double SwapCost(int iItemPos, int iItemNeg, const double* const adY, const CRanker& ranker) const; }; // Class to implement IR Measure 'Mean Average Precision' // Assumption: Labels are 0 or 1 class CMAP : public CIRMeasure { public: void Init(unsigned long cMaxGroup, unsigned long cNumItems, unsigned int cRankCutoff = UINT_MAX); double Measure(const double* const adY, const CRanker& ranker); double SwapCost(int iItemPos, int iItemNeg, const double* const adY, const CRanker& ranker) const; protected: // Buffer to hold positions of positive examples mutable vector veccRankPos; }; // Main class for 'pairwise' distribution // Notes and Assumptions: // * The items are sorted such that // * Instances belonging to the same group occur in // a contiguous range // * Within a group, labels are non-increasing. // * adGroup supplies the group ID (positive integer, but double // format for compliance with the base class interface). // * The targets adY are non-negative values, and binary {0,1} // for measures MRR and MAP. // * Higher IR measures are better. // * Only pairs with different labels are used for training. // * Instance weights (adWeight) are constant among groups. // * CPairwise::Initialize() is called before any of the other // functions, with same values for adY, adGroup, adWeight, and // nTrain. Certain values have to be precomputed for // efficiency. class CPairwise : public CDistribution { public: // Constructor: determine IR measure as either "conc", "map", "mrr", or "ndcg" CPairwise(const char* szIRMeasure); virtual ~CPairwise(); GBMRESULT Initialize(double *adY, double *adGroup, double *adOffset, double *adWeight, unsigned long cLength); GBMRESULT UpdateParams(double *adF, double *adOffset, double *adWeight, unsigned long cLength) { return GBM_OK; }; GBMRESULT ComputeWorkingResponse(double *adY, double *adGroup, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff); double Deviance(double *adY, double *adGroup, double *adOffset, double *adWeight, double *adF, unsigned long cLength, int cIdxOff); GBMRESULT InitF(double *adY, double *adGroup, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength); GBMRESULT FitBestConstant(double *adY, double *adGroup, double *adOffset, double *adW, double *adF, double *adZ, unsigned long *aiNodeAssign, unsigned long nTrain, VEC_P_NODETERMINAL vecpTermNodes, unsigned long cTermNodes, unsigned long cMinObsInNode, bool *afInBag, double *adFadj, int cIdxOff); double BagImprovement(double *adY, double *adGroup, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain); protected: // Calculate and accumulate up the gradients and Hessians from all training pairs void ComputeLambdas(int iGroup, unsigned int cNumItems, const double* const adY, const double* const adF, const double* const adWeight, double* adZ, double* adDeriv); CIRMeasure* pirm; // The IR measure to use CRanker ranker; // The ranker vector vecdHessian; // Second derivative of loss function, for each training instance; used for Newton step vector vecdNum; // Buffer used for numerator in FitBestConstant(), for each node vector vecdDenom; // Buffer used for denominator in FitBestConstant(), for each node vector vecdFPlusOffset; // Temporary buffer for (adF + adOffset), if the latter is not null }; #endif // PAIRWISE_H gbm/src/adaboost.h0000644000176000001440000000555613064145661013624 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // // File: adaboost.h // // License: GNU GPL (version 2 or later) // // Contents: Object for fitting for the AdaBoost loss function // // Owner: gregr@rand.org // // History: 3/26/2001 gregr created // 2/14/2003 gregr: adapted for R implementation // //------------------------------------------------------------------------------ #ifndef ADABOOST_H #define ADABOOST_H #include "distribution.h" class CAdaBoost : public CDistribution { public: CAdaBoost(); virtual ~CAdaBoost(); GBMRESULT UpdateParams(double *adF, double *adOffset, double *adWeight, unsigned long cLength) { return GBM_OK; }; GBMRESULT ComputeWorkingResponse(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adZ, bool *afInBag, unsigned long nTrain, int cIdxOff); GBMRESULT InitF(double *adY, double *adMisc, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength); GBMRESULT FitBestConstant(double *adY, double *adMisc, double *adOffset, double *adW, double *adF, double *adZ, unsigned long *aiNodeAssign, unsigned long nTrain, VEC_P_NODETERMINAL vecpTermNodes, unsigned long cTermNodes, unsigned long cMinObsInNode, bool *afInBag, double *adFadj, int cIdxOff); double Deviance(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, unsigned long cLength, int cIdxOff); double BagImprovement(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain); private: vector vecdNum; vector vecdDen; }; #endif // ADABOOST_H gbm/src/pairwise.cpp0000644000176000001440000007344713064145661014212 0ustar ripleyusers// Implementation file for 'pairwise' distribution // // Author: Stefan Schroedl (schroedl@a9.com) #include "pairwise.h" #include #include #include #include //#define NOISY_DEBUG #ifdef NOISY_DEBUG #endif void CRanker::Init(unsigned int cMaxItemsPerGroup) { // Allocate sorting buffers vecdipScoreRank.resize(cMaxItemsPerGroup); vecpdipScoreRank.resize(cMaxItemsPerGroup); } bool CRanker::SetGroupScores(const double* const adScores, const unsigned int cNumItems) { const double dEPS = 1e-10; if (cNumItems > vecdipScoreRank.size()) { // Allocate additional space // (We should never get here if CPairwise::Initialize has been called before, as expected) Init(cNumItems); } this->cNumItems = cNumItems; // Copy scores to buffer, and // initialize pointer array to score entries for(unsigned int i = 0; i < cNumItems; i++) { // Add small random number to break possible ties vecdipScoreRank[i].first = adScores[i] + dEPS * (unif_rand() - 0.5); vecpdipScoreRank[i] = &(vecdipScoreRank[i]); } return true; } // Auxiliary struct to compare pair pointers // decreasing order based on the first component (score) struct CDoubleUintPairPtrComparison { bool operator() (const CRanker::CDoubleUintPair* lhs, const CRanker::CDoubleUintPair* rhs) { return (lhs->first > rhs->first); } }; bool CRanker::Rank() { // Sort the pointer array, based on decreasing score CDoubleUintPairPtrComparison comp; sort(vecpdipScoreRank.begin(), vecpdipScoreRank.begin() + cNumItems, comp); bool bChanged = false; // Create inverted rank lookup for(unsigned int i = 0; i < cNumItems; i++) { // Note: ranks are 1-based const unsigned int cNewRank = i + 1; if (!bChanged) { bChanged = (cNewRank != vecpdipScoreRank[i]->second); } // Store the rank with the corresponding score in the vecdipScoreRank array vecpdipScoreRank[i]->second = cNewRank; } return bChanged; } void CConc::Init ( unsigned long cMaxGroup, unsigned long cMaxItemsPerGroup, unsigned int cRankCutoff ) { CIRMeasure::Init(cMaxGroup, cMaxItemsPerGroup, cRankCutoff); veccPairCount.resize(cMaxGroup + 1, -1); } unsigned int CConc::PairCount(unsigned int iGroup, const double* const adY, unsigned int cNumItems) { if (iGroup >= veccPairCount.size()) { // Allocate additional space // (We should never get here if CPairwise::Initialize has been called before, as expected) veccPairCount.resize(iGroup + 1, -1); } if (veccPairCount[iGroup] < 0.0) { // Not yet initialized veccPairCount[iGroup] = ComputePairCount(adY, cNumItems); } return veccPairCount[iGroup]; } // Calculate the number of pairs with different labels, and store in veccPairCount // Assumption: instances are sorted such that labels are non-increasing int CConc::ComputePairCount(const double* const adY, unsigned int cNumItems) { if (!AnyPairs(adY, cNumItems)) { return 0; } double dLabelCurrent = adY[0]; int iLabelEnd = 0; // End of range with higher labels int cPairs = 0; for (unsigned int j = 1; j < cNumItems; j++) { if (adY[j] != dLabelCurrent) { // i.e., dYj < dLabelCurrent iLabelEnd = j; dLabelCurrent = adY[j]; } // All items in 0 .. iLabelEnd - 1 are better than item j; // i.e, we have pairs (j,0), (j,1), ... (j, iLabelEnd - 1) cPairs += iLabelEnd; } return cPairs; } // Count the number of correctly ranked pairs with different labels double CConc::Measure(const double* const adY, const CRanker& ranker) { double dLabelCurrent = adY[0]; int iLabelEnd = 0; // End of the range with higher labels int cGoodPairs = 0; for (unsigned int j = 1; j < ranker.GetNumItems(); j++) { const double dYj = adY[j]; if (dYj != dLabelCurrent) { // i.e., dYj < dLabelCurrent iLabelEnd = j; dLabelCurrent = dYj; } // All items in 0 .. iLabelEnd - 1 are better than this item for (int i = 0; i < iLabelEnd; i++) { if (ranker.GetRank(i) < ranker.GetRank(j)) { cGoodPairs++; } } } return cGoodPairs; } double CConc::SwapCost(int iItemBetter, int iItemWorse, const double* const adY, const CRanker& ranker) const { // Note: this implementation can handle arbitrary non-negative target values. // For binary (0/1) targets, the swap cost would reduce to the much simpler expression: // (int)ranker.GetRank(iItemBetter) - (int)ranker.GetRank(iItemWorse) const unsigned int cRankBetter = ranker.GetRank(iItemBetter); const unsigned int cRankWorse = ranker.GetRank(iItemWorse); // Which one of the two has the higher rank? unsigned int cRankUpper, cRankLower; double dYUpper, dYLower; int cDiff; if (cRankBetter > cRankWorse) { // Concordance increasing cRankUpper = cRankWorse; cRankLower = cRankBetter; dYUpper = adY[iItemWorse]; dYLower = adY[iItemBetter]; cDiff = 1; // The direct impact of the pair (iItemBetter, iItemWorse) } else { // Concordance decreasing cRankUpper = cRankBetter; cRankLower = cRankWorse; dYUpper = adY[iItemBetter]; dYLower = adY[iItemWorse]; cDiff = -1; // // The direct impact of the pair (iItemBetter, iItemWorse) } // Compute indirect impact for pairs involving items in between the two for (unsigned int cRank = cRankUpper + 1; cRank < cRankLower; cRank++) { const double dYi = adY[ranker.GetItem(cRank)]; double dScoreDiff = dYi - dYLower; if (dScoreDiff != 0) { cDiff += (dScoreDiff < 0) ? 1 : -1; } dScoreDiff = dYi - dYUpper; if (dScoreDiff != 0) { cDiff += (dScoreDiff < 0) ? -1 : 1; } } return cDiff; } void CNDCG::Init ( unsigned long cMaxGroup, unsigned long cMaxItemsPerGroup, unsigned int cRankCutoff ) { CIRMeasure::Init(cMaxGroup, cMaxItemsPerGroup, cRankCutoff); // Initialize rank weights (note: ranks are 1-based) vecdRankWeight.resize(cMaxItemsPerGroup + 1, 0.0); const unsigned int cMaxRank = std::min((unsigned int)cMaxItemsPerGroup, GetCutoffRank()); // Precompute rank weights for (unsigned int i = 1; i <= cMaxRank; i++) { vecdRankWeight[i] = log((double)2) / log((double)(i+1)); } // Allocate buffer vecdMaxDCG.resize(cMaxGroup + 1, -1.0); } // Sum of target values, weighted by rank weight double CNDCG::Measure(const double* const adY, const CRanker& ranker) { double dScore = 0; for (unsigned int i = 0; i < ranker.GetNumItems(); i++) { dScore += adY[i] * vecdRankWeight[ranker.GetRank(i)]; } return dScore; } double CNDCG::MaxMeasure(unsigned int iGroup, const double* const adY, unsigned int cNumItems) { if (iGroup >= vecdMaxDCG.size()) { // Allocate additional space // (We should never get here if CPairwise::Initialize has been called before, as expected) vecdMaxDCG.resize(iGroup + 1, -1.0); } if (vecdMaxDCG[iGroup] < 0.0) { // Not initialized if (!AnyPairs(adY, cNumItems)) { // No training pairs exist vecdMaxDCG[iGroup] = 0.0; } else { // Compute maximum possible DCG. // Note: By assumption, items are pre-sorted by descending score. double dScore = 0; unsigned int i = 0; while (i < cNumItems && adY[i] > 0) { // Note: Due to sorting, we can terminate early for a zero score. dScore += adY[i] * vecdRankWeight[i + 1]; i++; } vecdMaxDCG[iGroup] = dScore; #ifdef NOISY_DEBUG if (vecdMaxDCG[iGroup] == 0) { Rprintf("max score is 0: iGroup = %d, maxScore = %f, sz = %d\n", iGroup, vecdMaxDCG[iGroup], ranker.GetNumItems()); assert(false); } #endif } } return vecdMaxDCG[iGroup]; } double CNDCG::SwapCost(int iItemBetter, int iItemWorse, const double* const adY, const CRanker& ranker) const { const unsigned int cRanki = ranker.GetRank(iItemBetter); const unsigned int cRankj = ranker.GetRank(iItemWorse); return (vecdRankWeight[cRanki] - vecdRankWeight[cRankj]) * (adY[iItemBetter] - adY[iItemWorse]); } // Auxiliary function to find the top rank of a positive item (cRankTop), and the number of positive items (cPos) inline void TopRankPos(const double* const adY, const CRanker& ranker, unsigned int& cRankTop, unsigned int& cPos) { const unsigned int cNumItems = ranker.GetNumItems(); cRankTop = cNumItems + 1; // Ranks are 1-based for (cPos = 0; cPos < cNumItems; cPos++) { if (adY[cPos] <= 0.0) { // All subsequent items are zero, because of presorting return; } cRankTop = min(cRankTop, ranker.GetRank(cPos)); } } double CMRR::Measure(const double* const adY, const CRanker& ranker) { unsigned int cRankTop, cPos; TopRankPos(adY, ranker, cRankTop, cPos); const unsigned int cNumItems = min(ranker.GetNumItems(), GetCutoffRank()); if (cRankTop >= cNumItems + 1) { // No positive item found return 0.0; } // Ranks start at 1 return 1.0 / cRankTop; } double CMRR::SwapCost(int iItemPos, int iItemNeg, const double* const adY, const CRanker& ranker) const { unsigned int cRankTop, cPos; TopRankPos(adY, ranker, cRankTop, cPos); const unsigned int cNumItems = ranker.GetNumItems(); if (cRankTop >= cNumItems + 1 // No positive item (ranks are 1-based) || cPos >= cNumItems) // No negative item { return 0.0; } const unsigned int cRankPos = ranker.GetRank(iItemPos); const unsigned int cRankNeg = ranker.GetRank(iItemNeg); const unsigned int cCutoffRank = GetCutoffRank(); const double dMeasureCurrent = (cRankTop > cCutoffRank) ? 0.0 : 1.0 / cRankTop; const double dMeasureNeg = (cRankNeg > cCutoffRank) ? 0.0 : 1.0 / cRankNeg; // Only pairs where the negative item is above the top positive result, // or else where the positive item *is* the top item, can change the MRR return ((cRankNeg < cRankTop || cRankPos == cRankTop) ? (dMeasureNeg - dMeasureCurrent) : 0.0); } void CMAP::Init ( unsigned long cMaxGroup, unsigned long cMaxItemsPerGroup, unsigned int cRankCutoff ) { CIRMeasure::Init(cMaxGroup, cMaxItemsPerGroup, cRankCutoff); // Allocate rank buffer (note: ranks are 1-based) veccRankPos.resize(cMaxItemsPerGroup + 1); } // Auxiliary function to find the sorted ranks of positive items (veccRankPos), and their number (cPos) inline void SortRankPos(const double* const adY, const CRanker& ranker, vector& veccRankPos, unsigned int& cPos) { // Store all ranks of positive items in veccRankPos for (cPos = 0; cPos < ranker.GetNumItems(); cPos++) { if (adY[cPos] <= 0.0) { // All subsequent items are zero, because of presorting break; } veccRankPos[cPos] = ranker.GetRank(cPos); } sort(veccRankPos.begin(), veccRankPos.begin() + cPos); } double CMAP::SwapCost(int iItemPos, int iItemNeg, const double* const adY, const CRanker& ranker) const { unsigned int cPos; SortRankPos(adY, ranker, veccRankPos, cPos); if (cPos == 0) { return 0.0; } // Now veccRankPos[i] is the i-th highest rank of a positive item, and // cPos is the total number of positive items. const int iRankItemPos = ranker.GetRank(iItemPos); const int iRankItemNeg = ranker.GetRank(iItemNeg); // Search for the position of the two items to swap const vector::iterator itItemPos = upper_bound(veccRankPos.begin(), veccRankPos.begin() + cPos, iRankItemPos); const vector::iterator itItemNeg = upper_bound(veccRankPos.begin(), veccRankPos.begin() + cPos, iRankItemNeg); // The number of positive items up to and including iItemPos const unsigned int cNumPosNotBelowItemPos = (unsigned int)(itItemPos - veccRankPos.begin()); // The number of positive items up to iItemNeg (Note: Cannot include iItemNeg itself) const unsigned int cNumPosAboveItemNeg = (unsigned int)(itItemNeg - veccRankPos.begin()); // Range of indices of positive items between iRankItemPos and iRankItemNeg (exclusively) int cIntermediateHigh, cIntermediateLow; // Current contribution of iItemPos double dContribBefore = (double) cNumPosNotBelowItemPos / iRankItemPos; double dSign, dContribAfter; if (iRankItemNeg > iRankItemPos) { // MAP is decreasing dSign = -1.0; // The first positive item after iRankItemPos cIntermediateLow = cNumPosNotBelowItemPos; // The last positive item before iRankItemNeg cIntermediateHigh = cNumPosAboveItemNeg - 1; // Note: iItemPos already counted in cNumPosAboveItemNeg dContribAfter = (double)cNumPosAboveItemNeg / iRankItemNeg; } else { // MAP is increasing dSign = 1.0; // The first positive result after iRankItemNeg cIntermediateLow = cNumPosAboveItemNeg; // The first positive result after iRankItemPos, minus iItemPos itself cIntermediateHigh = cNumPosNotBelowItemPos - 2; // Note: iItemPos not yet counted in cNumPosAboveItemNeg dContribAfter = (double) (cNumPosAboveItemNeg + 1) / iRankItemNeg; } // The direct effect of switching iItemPos double dDiff = dContribAfter - dContribBefore; // The indirect effect for all items in between the two items for (int j = cIntermediateLow; j <= cIntermediateHigh; j++) { dDiff += dSign / veccRankPos[j]; } return dDiff / cPos; } double CMAP::Measure(const double* const adY, const CRanker& ranker) { unsigned int cPos; SortRankPos(adY, ranker, veccRankPos, cPos); if (cPos == 0) { return 0.0; } // Now veccRankPos[i] is the i-th highest rank of a positive item double dPrec = 0.0; for (unsigned int j = 0; j < cPos; j++) { dPrec += double(j + 1) / veccRankPos[j]; } return dPrec / cPos; } CPairwise::CPairwise(const char* szIRMeasure) { // Construct the IR Measure if (!strcmp(szIRMeasure, "conc")) { pirm = new CConc(); } else if (!strcmp(szIRMeasure, "map")) { pirm = new CMAP(); } else if (!strcmp(szIRMeasure, "mrr")) { pirm = new CMRR(); } else { if (strcmp(szIRMeasure, "ndcg")) { Rprintf("Unknown IR measure '%s' in initialization, using 'ndcg' instead\n", szIRMeasure); } pirm = new CNDCG(); } } CPairwise::~CPairwise() { delete pirm; } // Auxiliary function for addition of optional offset parameter inline const double* OffsetVector(const double* const adX, const double* const adOffset, unsigned int iStart, unsigned int iEnd, vector& vecBuffer) { if (adOffset == NULL) { // Optional second argument is not set, just return first one return adX + iStart; } else { for (unsigned int i = iStart, iOut = 0; i < iEnd; i++, iOut++) { vecBuffer[iOut] = adX[i] + adOffset[i]; } return &vecBuffer[0]; } } GBMRESULT CPairwise::ComputeWorkingResponse ( double *adY, double *adGroup, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff ) { #ifdef NOISY_DEBUG Rprintf("compute working response, nTrain = %u, cIdxOff = %d\n", nTrain, cIdxOff); #endif if (nTrain <= 0) { return GBM_OK; } try { // Iterate through all groups, compute gradients unsigned int iItemStart = 0; unsigned int iItemEnd = 0; while (iItemStart < nTrain) { adZ[iItemEnd] = 0; vecdHessian[iItemEnd] = 0; const double dGroup = adGroup[iItemStart]; // Find end of current group, initialize working response for (iItemEnd = iItemStart + 1; iItemEnd < nTrain && adGroup[iItemEnd] == dGroup; iItemEnd++) { // Clear gradients from last iteration adZ[iItemEnd] = 0; vecdHessian[iItemEnd] = 0; } #ifdef NOISY_DEBUG // Check sorting for (unsigned int i = iItemStart; i < iItemEnd-1; i++) { assert(adY[i] >= adY[i+1]); } #endif if (afInBag[iItemStart]) { // Group is part of the training set const int cNumItems = iItemEnd - iItemStart; // If offset given, add up current scores const double* adFPlusOffset = OffsetVector(adF, adOffset, iItemStart, iItemEnd, vecdFPlusOffset); // Accumulate gradients ComputeLambdas((int)dGroup, cNumItems, adY + iItemStart, adFPlusOffset, adWeight + iItemStart, adZ + iItemStart, &vecdHessian[iItemStart]); } // Next group iItemStart = iItemEnd; } } catch (std::bad_alloc&) { return GBM_OUTOFMEMORY; } return GBM_OK; } // Referring to MSR-TR-2010-82-2, section 7 (see also the vignette): // // Let P be the set of pairs (i,j) where Y(i)>Y(j) (i is better than j). // The approximation to the IR measure is the utility function C (to be maximized) // C // = \Sum_{(i,j) in P} |Delta Z_ij| C(s_i - s_j) // = \Sum_{(i,j) in P} |Delta Z_ij| / (1 + exp(-(s_i - s_j))), // where |Delta Z_ij| is the cost of swapping (only) i and j in the current ranking, // and s_i, s_j are the prediction scores (sum of the tree predictions) for items // i and j. // // For (i,j) in P, define // lambda_ij // = dC(s_i-s_j) / ds_i // = - |Delta Z_ij| / (1 + exp(s_i - s_j)) // = - |Delta Z_ij| * rho_ij, // with // rho_ij = - lambda_ij / |Delta Z_ij| = 1 / (1 + exp(s_i - s_j)) // // So the gradient of C with respect to s_i is // dC / ds_i // =(def) lambda_i // = \Sum_{j|(i,j) in P} lambda_ij - \Sum_{j|(j,i) in P} lambda_ji // = - \Sum_{j|(i,j) in P} |Delta Z_ij| * rho_ij // + \Sum_{j|(j,i) in P} |Delta Z_ji| * rho_ji; // it is stored in adZ[i]. // // The second derivative is // d^2C / ds_i^2 // =(def) gamma_i // = \Sum_{j|(i,j) in P} |Delta Z_ij| * rho_ij * (1-rho_ij) // - \Sum_{j|(j,i) in P} |Delta Z_ji| * rho_ji * (1-rho_ji); // it is stored in vecdHessian[i]. // // The Newton step for a particular leaf node is (a fraction of) // g'/g'', where g' (resp. g'') is the sum of dC/ds_i = lambda_i // (resp. d^2C/d^2s_i = gamma_i) over all instances falling into this leaf. This // summation is calculated later in CPairwise::FitBestConstant(). void CPairwise::ComputeLambdas(int iGroup, unsigned int cNumItems, const double* const adY, const double* const adF, const double* const adWeight, double* adZ, double* adDeriv) { // Assumption: Weights are constant within group if (adWeight[0] <= 0) { return; } // Normalize for maximum achievable group score const double dMaxScore = pirm->MaxMeasure(iGroup, adY, cNumItems); if (dMaxScore <= 0.0) { // No pairs return; } // Rank items by current score ranker.SetGroupScores(adF, cNumItems); ranker.Rank(); double dLabelCurrent = adY[0]; // First index of instance that has dLabelCurrent // (i.e., each smaller index corresponds to better item) unsigned int iLabelCurrentStart = 0; // Number of pairs with unequal labels unsigned int cPairs = 0; #ifdef NOISY_DEBUG double dMeasureBefore = pirm->Measure(adY, ranker); #endif for (unsigned int j = 1; j < cNumItems; j++) { const double dYj = adY[j]; if (dYj != dLabelCurrent) { iLabelCurrentStart = j; dLabelCurrent = dYj; } for (unsigned int i = 0; i < iLabelCurrentStart; i++) { // Instance i is better than j const double dSwapCost = fabs(pirm->SwapCost(i, j, adY, ranker)); #ifdef NOISY_DEBUG double dDelta = fabs(pirm->SwapCost(i, j, adY, ranker)); const int cRanki = ranker.GetRank(i); const int cRankj = ranker.GetRank(j); ranker.SetRank(i, cRankj); ranker.SetRank(j, cRanki); double dMeasureAfter = pirm->Measure(adY, ranker); if (fabs(dMeasureBefore-dMeasureAfter) - dDelta > 1e-5) { Rprintf("%f %f %f %f %f %d %d\n", pirm->SwapCost(i, j, adY, ranker), dMeasureBefore, dMeasureAfter, dMeasureBefore - dMeasureAfter, dDelta , i, j); for (unsigned int k = 0; k < cNumItems; k++) { Rprintf("%d\t%d\t%f\t%f\n", k, ranker.GetRank(k), adY[k], adF[k]); } assert(false); } assert(fabs(dMeasureBefore - dMeasureAfter) - fabs(dDelta) < 1e-5); ranker.SetRank(j, cRankj); ranker.SetRank(i, cRanki); #endif assert(isfinite(dSwapCost)); if (dSwapCost > 0.0) { cPairs++; const double dRhoij = 1.0 / (1.0 + exp(adF[i]- adF[j])) ; assert(isfinite(dRhoij)); const double dLambdaij = dSwapCost * dRhoij; adZ[i] += dLambdaij; adZ[j] -= dLambdaij; const double dDerivij = dLambdaij * (1.0 - dRhoij); assert(dDerivij >= 0); adDeriv[i] += dDerivij; adDeriv[j] += dDerivij; } } } if (cPairs > 0) { // Normalize for number of training pairs const double dQNorm = 1.0 / (dMaxScore * cPairs); for (unsigned int j = 0; j < cNumItems; j++) { adZ[j] *= dQNorm; adDeriv[j] *= dQNorm; } } } GBMRESULT CPairwise::Initialize ( double *adY, double *adGroup, double *adOffset, double *adWeight, unsigned long cLength ) { if (cLength <= 0) { return GBM_OK; } try { // Allocate memory for derivative buffer vecdHessian.resize(cLength); // Count the groups and number of items per group unsigned int cMaxItemsPerGroup = 0; double dMaxGroup = 0; unsigned int iItemStart = 0; unsigned int iItemEnd = 0; while (iItemStart < cLength) { const double dGroup = adGroup[iItemStart]; // Find end of current group for (iItemEnd = iItemStart + 1; iItemEnd < cLength && adGroup[iItemEnd] == dGroup; iItemEnd++); const unsigned int cNumItems = iItemEnd - iItemStart; if (cNumItems > cMaxItemsPerGroup) { cMaxItemsPerGroup = cNumItems; } if (dGroup > dMaxGroup) { dMaxGroup = dGroup; } // Next group iItemStart = iItemEnd; } // Allocate buffer for offset addition vecdFPlusOffset.resize(cMaxItemsPerGroup); // Allocate ranker memory ranker.Init(cMaxItemsPerGroup); // Allocate IR measure memory // The last element of adGroup specifies the cutoff // (zero means no cutoff) unsigned int cRankCutoff = cMaxItemsPerGroup; if (adGroup[cLength] > 0) { cRankCutoff = (unsigned int)adGroup[cLength]; } pirm->Init((unsigned long)dMaxGroup, cMaxItemsPerGroup, cRankCutoff); #ifdef NOISY_DEBUG Rprintf("Initialization: instances=%ld, groups=%u, max items per group=%u, rank cutoff=%u, offset specified: %d\n", cLength, (unsigned long)dMaxGroup, cMaxItemsPerGroup, cRankCutoff, (adOffset != NULL)); #endif } catch (std::bad_alloc&) { return GBM_OUTOFMEMORY; } return GBM_OK; } GBMRESULT CPairwise::InitF ( double *adY, double *adGroup, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength ) { dInitF = 0.0; return GBM_OK; } double CPairwise::Deviance ( double *adY, double *adGroup, double *adOffset, double *adWeight, double *adF, unsigned long cLength, int cIdxOff ) { #ifdef NOISY_DEBUG Rprintf("Deviance, cLength = %u, cIdxOff = %d\n", cLength, cIdxOff); #endif if (cLength <= 0) { return 0; } double dL = 0.0; double dW = 0.0; unsigned int iItemStart = cIdxOff; unsigned int iItemEnd = iItemStart; const unsigned int cEnd = cLength + cIdxOff; while (iItemStart < cEnd) { const double dGroup = adGroup[iItemStart]; const double dWi = adWeight[iItemStart]; // Find end of current group for (iItemEnd = iItemStart + 1; iItemEnd < cEnd && adGroup[iItemEnd] == dGroup; iItemEnd++) ; const int cNumItems = iItemEnd - iItemStart; const double dMaxScore = pirm->MaxMeasure((int)dGroup, adY + iItemStart, cNumItems); if (dMaxScore > 0.0) { // Rank items by current score // If offset given, add up current scores const double* adFPlusOffset = OffsetVector(adF, adOffset, iItemStart, iItemEnd, vecdFPlusOffset); ranker.SetGroupScores(adFPlusOffset, cNumItems); ranker.Rank(); dL += dWi * pirm->Measure(adY + iItemStart, ranker) / dMaxScore; dW += dWi; } // Next group iItemStart = iItemEnd; } // Loss = 1 - utility return 1.0 - dL / dW; } GBMRESULT CPairwise::FitBestConstant ( double *adY, double *adGroup, double *adOffset, double *adW, double *adF, double *adZ, unsigned long *aiNodeAssign, unsigned long nTrain, VEC_P_NODETERMINAL vecpTermNodes, unsigned long cTermNodes, unsigned long cMinObsInNode, bool *afInBag, double *adFadj, int cIdxOff ) { #ifdef NOISY_DEBUG Rprintf("FitBestConstant, nTrain = %u, cIdxOff = %d, cTermNodes = %d, \n", nTrain, cIdxOff, cTermNodes); #endif // Assumption: ComputeWorkingResponse() has been executed before with // the same arguments try { // Allocate space for numerators and denominators, and set to zero vecdNum.reserve(cTermNodes); vecdDenom.reserve(cTermNodes); for (unsigned int i = 0; i < cTermNodes; i++) { vecdNum[i] = 0.0; vecdDenom[i] = 0.0; } } catch (std::bad_alloc&) { return GBM_OUTOFMEMORY; } for (unsigned int iObs = 0; iObs < nTrain; iObs++) { if (afInBag[iObs]) { assert(isfinite(adW[iObs])); assert(isfinite(adZ[iObs])); assert(isfinite(vecdHessian[iObs])); vecdNum[aiNodeAssign[iObs]] += adW[iObs] * adZ[iObs]; vecdDenom[aiNodeAssign[iObs]] += adW[iObs] * vecdHessian[iObs]; } } for (unsigned int iNode = 0; iNode < cTermNodes; iNode++) { if (vecpTermNodes[iNode] != NULL) { vecpTermNodes[iNode]->dPrediction = vecdNum[iNode]; if (vecdDenom[iNode] <= 0.0) { vecpTermNodes[iNode]->dPrediction = 0.0; } else { vecpTermNodes[iNode]->dPrediction = vecdNum[iNode]/vecdDenom[iNode]; } } } return GBM_OK; } double CPairwise::BagImprovement ( double *adY, double *adGroup, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain ) { #ifdef NOISY_DEBUG Rprintf("BagImprovement, nTrain = %u\n", nTrain); #endif if (nTrain <= 0) { return 0; } double dL = 0.0; double dW = 0.0; unsigned int iItemStart = 0; unsigned int iItemEnd = 0; while (iItemStart < nTrain) { const double dGroup = adGroup[iItemStart]; // Find end of current group for (iItemEnd = iItemStart + 1; iItemEnd < nTrain && adGroup[iItemEnd] == dGroup; iItemEnd++) ; if (!afInBag[iItemStart]) { // Group was held out of training set const unsigned int cNumItems = iItemEnd - iItemStart; const double dMaxScore = pirm->MaxMeasure((int)dGroup, adY + iItemStart, cNumItems); if (dMaxScore > 0.0) { // If offset given, add up current scores const double* adFPlusOffset = OffsetVector(adF, adOffset, iItemStart, iItemEnd, vecdFPlusOffset); // Compute score according to old score, adF ranker.SetGroupScores(adFPlusOffset, cNumItems); ranker.Rank(); const double dOldScore = pirm->Measure(adY + iItemStart, ranker); // Compute score according to new score: adF' = adF + dStepSize * adFadj for (unsigned int i = 0; i < cNumItems; i++) { ranker.AddToScore(i, adFadj[i+iItemStart] * dStepSize); } const double dWi = adWeight[iItemStart]; if (ranker.Rank()) { // Ranking changed const double dNewScore = pirm->Measure(adY + iItemStart, ranker); dL += dWi * (dNewScore - dOldScore) / dMaxScore; } dW += dWi; } } // Next group iItemStart = iItemEnd; } return dL / dW; } gbm/src/distribution.cpp0000644000176000001440000000022613064145661015067 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "distribution.h" CDistribution::CDistribution() { } CDistribution::~CDistribution() { } gbm/src/node_continuous.cpp0000644000176000001440000001211213064145661015560 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "node_continuous.h" #include "node_factory.h" CNodeContinuous::CNodeContinuous() { dSplitValue = 0.0; } CNodeContinuous::~CNodeContinuous() { #ifdef NOISY_DEBUG Rprintf("continuous destructor\n"); #endif } GBMRESULT CNodeContinuous::PrintSubtree ( unsigned long cIndent ) { GBMRESULT hr = GBM_OK; unsigned long i = 0; for(i=0; i< cIndent; i++) Rprintf(" "); Rprintf("N=%f, Improvement=%f, Prediction=%f, NA pred=%f\n", dTrainW, dImprovement, dPrediction, (pMissingNode == NULL ? 0.0 : pMissingNode->dPrediction)); for(i=0; i< cIndent; i++) Rprintf(" "); Rprintf("V%d < %f\n", iSplitVar, dSplitValue); hr = pLeftNode->PrintSubtree(cIndent+1); for(i=0; i< cIndent; i++) Rprintf(" "); Rprintf("V%d > %f\n", iSplitVar, dSplitValue); hr = pRightNode->PrintSubtree(cIndent+1); for(i=0; i< cIndent; i++) Rprintf(" "); Rprintf("missing\n"); hr = pMissingNode->PrintSubtree(cIndent+1); return hr; } signed char CNodeContinuous::WhichNode ( CDataset *pData, unsigned long iObs ) { signed char ReturnValue = 0; double dX = pData->adX[iSplitVar*(pData->cRows) + iObs]; if(!ISNA(dX)) { if(dX < dSplitValue) { ReturnValue = -1; } else { ReturnValue = 1; } } // if missing value returns 0 return ReturnValue; } signed char CNodeContinuous::WhichNode ( double *adX, unsigned long cRow, unsigned long cCol, unsigned long iRow ) { signed char ReturnValue = 0; double dX = adX[iSplitVar*cRow + iRow]; if(!ISNA(dX)) { if(dX < dSplitValue) { ReturnValue = -1; } else { ReturnValue = 1; } } // if missing value returns 0 return ReturnValue; } GBMRESULT CNodeContinuous::RecycleSelf ( CNodeFactory *pNodeFactory ) { GBMRESULT hr = GBM_OK; pNodeFactory->RecycleNode(this); return hr; }; GBMRESULT CNodeContinuous::TransferTreeToRList ( int &iNodeID, CDataset *pData, int *aiSplitVar, double *adSplitPoint, int *aiLeftNode, int *aiRightNode, int *aiMissingNode, double *adErrorReduction, double *adWeight, double *adPred, VEC_VEC_CATEGORIES &vecSplitCodes, int cCatSplitsOld, double dShrinkage ) { GBMRESULT hr = GBM_OK; int iThisNodeID = iNodeID; aiSplitVar[iThisNodeID] = iSplitVar; adSplitPoint[iThisNodeID] = dSplitValue; adErrorReduction[iThisNodeID] = dImprovement; adWeight[iThisNodeID] = dTrainW; adPred[iThisNodeID] = dShrinkage*dPrediction; iNodeID++; aiLeftNode[iThisNodeID] = iNodeID; hr = pLeftNode->TransferTreeToRList(iNodeID, pData, aiSplitVar, adSplitPoint, aiLeftNode, aiRightNode, aiMissingNode, adErrorReduction, adWeight, adPred, vecSplitCodes, cCatSplitsOld, dShrinkage); if(GBM_FAILED(hr)) goto Error; aiRightNode[iThisNodeID] = iNodeID; hr = pRightNode->TransferTreeToRList(iNodeID, pData, aiSplitVar, adSplitPoint, aiLeftNode, aiRightNode, aiMissingNode, adErrorReduction, adWeight, adPred, vecSplitCodes, cCatSplitsOld, dShrinkage); if(GBM_FAILED(hr)) goto Error; aiMissingNode[iThisNodeID] = iNodeID; hr = pMissingNode->TransferTreeToRList(iNodeID, pData, aiSplitVar, adSplitPoint, aiLeftNode, aiRightNode, aiMissingNode, adErrorReduction, adWeight, adPred, vecSplitCodes, cCatSplitsOld, dShrinkage); if(GBM_FAILED(hr)) goto Error; Cleanup: return hr; Error: goto Cleanup; } gbm/src/node.h0000644000176000001440000000670113064145661012746 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // // File: node.h // // License: GNU GPL (version 2 or later) // // Contents: a node in the tree // // Owner: gregr@rand.org // // History: 3/26/2001 gregr created // 2/14/2003 gregr: adapted for R implementation // //------------------------------------------------------------------------------ #ifndef NODGBM_H #define NODGBM_H #include #include "dataset.h" #include "buildinfo.h" class CNodeFactory; using namespace std; typedef vector VEC_CATEGORIES; typedef vector VEC_VEC_CATEGORIES; class CNode { public: CNode(); virtual ~CNode(); virtual GBMRESULT Adjust(unsigned long cMinObsInNode); virtual GBMRESULT Predict(CDataset *pData, unsigned long iRow, double &dFadj); virtual GBMRESULT Predict(double *adX, unsigned long cRow, unsigned long cCol, unsigned long iRow, double &dFadj) = 0; static double Improvement ( double dLeftW, double dRightW, double dMissingW, double dLeftSum, double dRightSum, double dMissingSum ) { double dTemp = 0.0; double dResult = 0.0; if(dMissingW == 0.0) { dTemp = dLeftSum/dLeftW - dRightSum/dRightW; dResult = dLeftW*dRightW*dTemp*dTemp/(dLeftW+dRightW); } else { dTemp = dLeftSum/dLeftW - dRightSum/dRightW; dResult += dLeftW*dRightW*dTemp*dTemp; dTemp = dLeftSum/dLeftW - dMissingSum/dMissingW; dResult += dLeftW*dMissingW*dTemp*dTemp; dTemp = dRightSum/dRightW - dMissingSum/dMissingW; dResult += dRightW*dMissingW*dTemp*dTemp; dResult /= (dLeftW + dRightW + dMissingW); } return dResult; } virtual GBMRESULT PrintSubtree(unsigned long cIndent); virtual GBMRESULT TransferTreeToRList(int &iNodeID, CDataset *pData, int *aiSplitVar, double *adSplitPoint, int *aiLeftNode, int *aiRightNode, int *aiMissingNode, double *adErrorReduction, double *adWeight, double *adPred, VEC_VEC_CATEGORIES &vecSplitCodes, int cCatSplitsOld, double dShrinkage); double TotalError(); virtual GBMRESULT GetVarRelativeInfluence(double *adRelInf); virtual GBMRESULT RecycleSelf(CNodeFactory *pNodeFactory) = 0; double dPrediction; double dTrainW; // total training weight in node unsigned long cN; // number of training observations in node bool isTerminal; protected: double GetXEntry(CDataset *pData, unsigned long iRow, unsigned long iCol) { return pData->adX[iCol*(pData->cRows) + iRow]; } }; typedef CNode *PCNode; #endif // NODGBM_H gbm/src/node_nonterminal.h0000644000176000001440000000470713064145661015360 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // // File: node_nonterminal.h // // License: GNU GPL (version 2 or later) // // Contents: a node in the tree // // Owner: gregr@rand.org // // History: 3/26/2001 gregr created // 2/14/2003 gregr: adapted for R implementation // //------------------------------------------------------------------------------ #ifndef NODENONTERMINAL_H #define NODENONTERMINAL_H #include "node.h" #include "node_terminal.h" class CNodeNonterminal : public CNode { public: CNodeNonterminal(); virtual ~CNodeNonterminal(); virtual GBMRESULT Adjust(unsigned long cMinObsInNode); virtual signed char WhichNode(CDataset *pData, unsigned long iObs) = 0; virtual signed char WhichNode(double *adX, unsigned long cRow, unsigned long cCol, unsigned long iRow) = 0; virtual GBMRESULT TransferTreeToRList(int &iNodeID, CDataset *pData, int *aiSplitVar, double *adSplitPoint, int *aiLeftNode, int *aiRightNode, int *aiMissingNode, double *adErrorReduction, double *adWeight, double *adPred, VEC_VEC_CATEGORIES &vecSplitCodes, int cCatSplitsOld, double dShrinkage) = 0; GBMRESULT Predict(CDataset *pData, unsigned long iRow, double &dFadj); GBMRESULT Predict(double *adX, unsigned long cRow, unsigned long cCol, unsigned long iRow, double &dFadj); GBMRESULT GetVarRelativeInfluence(double *adRelInf); virtual GBMRESULT RecycleSelf(CNodeFactory *pNodeFactory) = 0; CNode *pLeftNode; CNode *pRightNode; CNode *pMissingNode; unsigned long iSplitVar; double dImprovement; }; typedef CNodeNonterminal *PCNodeNonterminal; #endif // NODENONTERMINAL_H gbm/src/node_factory.cpp0000644000176000001440000000673313064145661015035 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "node_factory.h" CNodeFactory::CNodeFactory() { } CNodeFactory::~CNodeFactory() { #ifdef NOISY_DEBUG Rprintf("destructing node factory\n"); #endif } GBMRESULT CNodeFactory::Initialize ( unsigned long cDepth ) { GBMRESULT hr = GBM_OK; unsigned long i = 0; for(i=0; idPrediction = 0.0; } return pNodeTerminalTemp; } CNodeContinuous* CNodeFactory::GetNewNodeContinuous() { if(ContinuousStack.empty()) { #ifdef NOISY_DEBUG Rprintf("Continuous stack is empty\n"); #endif pNodeContinuousTemp = NULL; } else { pNodeContinuousTemp = ContinuousStack.top(); ContinuousStack.pop(); pNodeContinuousTemp->dPrediction = 0.0; pNodeContinuousTemp->dImprovement = 0.0; pNodeContinuousTemp->pMissingNode = NULL; pNodeContinuousTemp->pLeftNode = NULL; pNodeContinuousTemp->pRightNode = NULL; pNodeContinuousTemp->iSplitVar = 0; pNodeContinuousTemp->dSplitValue = 0.0; } return pNodeContinuousTemp; } CNodeCategorical* CNodeFactory::GetNewNodeCategorical() { if(CategoricalStack.empty()) { #ifdef NOISY_DEBUG Rprintf("Categorical stack is empty\n"); #endif pNodeCategoricalTemp = NULL; } else { pNodeCategoricalTemp = CategoricalStack.top(); CategoricalStack.pop(); pNodeCategoricalTemp->dPrediction = 0.0; pNodeCategoricalTemp->dImprovement = 0.0; pNodeCategoricalTemp->pMissingNode = NULL; pNodeCategoricalTemp->pLeftNode = NULL; pNodeCategoricalTemp->pRightNode = NULL; pNodeCategoricalTemp->iSplitVar = 0; pNodeCategoricalTemp->aiLeftCategory = NULL; pNodeCategoricalTemp->cLeftCategory = 0; } return pNodeCategoricalTemp; } GBMRESULT CNodeFactory::RecycleNode ( CNodeTerminal *pNode ) { if(pNode != NULL) { TerminalStack.push(pNode); } return GBM_OK; } GBMRESULT CNodeFactory::RecycleNode ( CNodeContinuous *pNode ) { if(pNode != NULL) { if(pNode->pLeftNode != NULL) pNode->pLeftNode->RecycleSelf(this); if(pNode->pRightNode != NULL) pNode->pRightNode->RecycleSelf(this); if(pNode->pMissingNode != NULL) pNode->pMissingNode->RecycleSelf(this); ContinuousStack.push(pNode); } return GBM_OK; } GBMRESULT CNodeFactory::RecycleNode ( CNodeCategorical *pNode ) { if(pNode != NULL) { if(pNode->pLeftNode != NULL) pNode->pLeftNode->RecycleSelf(this); if(pNode->pRightNode != NULL) pNode->pRightNode->RecycleSelf(this); if(pNode->pMissingNode != NULL) pNode->pMissingNode->RecycleSelf(this); if(pNode->aiLeftCategory != NULL) { delete [] pNode->aiLeftCategory; pNode->aiLeftCategory = NULL; } CategoricalStack.push(pNode); } return GBM_OK; } gbm/src/node_search.cpp0000644000176000001440000003045013064145661014624 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // // File: node_search.cpp // //------------------------------------------------------------------------------ #include "node_search.h" CNodeSearch::CNodeSearch() :k_cMaxClasses(1024) { iBestSplitVar = 0; dBestSplitValue = 0.0; fIsSplit = false; dBestMissingTotalW = 0.0; dCurrentMissingTotalW = 0.0; dBestMissingSumZ = 0.0; dCurrentMissingSumZ = 0.0; adGroupSumZ = NULL; adGroupW = NULL; acGroupN = NULL; adGroupMean = NULL; aiCurrentCategory = NULL; aiBestCategory = NULL; iRank = UINT_MAX; } CNodeSearch::~CNodeSearch() { if(adGroupSumZ != NULL) { delete [] adGroupSumZ; adGroupSumZ = NULL; } if(adGroupW != NULL) { delete [] adGroupW; adGroupW = NULL; } if(acGroupN != NULL) { delete [] acGroupN; acGroupN = NULL; } if(adGroupMean != NULL) { delete [] adGroupMean; adGroupMean = NULL; } if(aiCurrentCategory != NULL) { delete [] aiCurrentCategory; aiCurrentCategory = NULL; } if(aiBestCategory != NULL) { delete [] aiBestCategory; aiBestCategory = NULL; } } GBMRESULT CNodeSearch::Initialize ( unsigned long cMinObsInNode ) { GBMRESULT hr = GBM_OK; adGroupSumZ = new double[k_cMaxClasses]; if(adGroupSumZ == NULL) { hr = GBM_OUTOFMEMORY; goto Error; } adGroupW = new double[k_cMaxClasses]; if(adGroupW == NULL) { hr = GBM_OUTOFMEMORY; goto Error; } acGroupN = new ULONG[k_cMaxClasses]; if(acGroupN == NULL) { hr = GBM_OUTOFMEMORY; goto Error; } adGroupMean = new double[k_cMaxClasses]; if(adGroupMean == NULL) { hr = GBM_OUTOFMEMORY; goto Error; } aiCurrentCategory = new int[k_cMaxClasses]; if(aiCurrentCategory == NULL) { hr = GBM_OUTOFMEMORY; goto Error; } aiBestCategory = new ULONG[k_cMaxClasses]; if(aiBestCategory == NULL) { hr = GBM_OUTOFMEMORY; goto Error; } this->cMinObsInNode = cMinObsInNode; Cleanup: return hr; Error: goto Cleanup; } GBMRESULT CNodeSearch::IncorporateObs ( double dX, double dZ, double dW, long lMonotone ) { GBMRESULT hr = GBM_OK; static double dWZ = 0.0; if(fIsSplit) goto Cleanup; dWZ = dW*dZ; if(ISNA(dX)) { dCurrentMissingSumZ += dWZ; dCurrentMissingTotalW += dW; cCurrentMissingN++; dCurrentRightSumZ -= dWZ; dCurrentRightTotalW -= dW; cCurrentRightN--; } else if(cCurrentVarClasses == 0) // variable is continuous { if(dLastXValue > dX) { error("Observations are not in order. gbm() was unable to build an index for the design matrix. Could be a bug in gbm or an unusual data type in data.\n"); hr = GBM_FAIL; goto Error; } // Evaluate the current split // the newest observation is still in the right child dCurrentSplitValue = 0.5*(dLastXValue + dX); if((dLastXValue != dX) && (cCurrentLeftN >= cMinObsInNode) && (cCurrentRightN >= cMinObsInNode) && ((lMonotone==0) || (lMonotone*(dCurrentRightSumZ*dCurrentLeftTotalW - dCurrentLeftSumZ*dCurrentRightTotalW) > 0))) { dCurrentImprovement = CNode::Improvement(dCurrentLeftTotalW,dCurrentRightTotalW, dCurrentMissingTotalW, dCurrentLeftSumZ,dCurrentRightSumZ, dCurrentMissingSumZ); if(dCurrentImprovement > dBestImprovement) { iBestSplitVar = iCurrentSplitVar; dBestSplitValue = dCurrentSplitValue; cBestVarClasses = 0; dBestLeftSumZ = dCurrentLeftSumZ; dBestLeftTotalW = dCurrentLeftTotalW; cBestLeftN = cCurrentLeftN; dBestRightSumZ = dCurrentRightSumZ; dBestRightTotalW = dCurrentRightTotalW; cBestRightN = cCurrentRightN; dBestImprovement = dCurrentImprovement; } } // now move the new observation to the left // if another observation arrives we will evaluate this dCurrentLeftSumZ += dWZ; dCurrentLeftTotalW += dW; cCurrentLeftN++; dCurrentRightSumZ -= dWZ; dCurrentRightTotalW -= dW; cCurrentRightN--; dLastXValue = dX; } else // variable is categorical, evaluates later { adGroupSumZ[(unsigned long)dX] += dWZ; adGroupW[(unsigned long)dX] += dW; acGroupN[(unsigned long)dX] ++; } Cleanup: return hr; Error: goto Cleanup; } GBMRESULT CNodeSearch::Set ( double dSumZ, double dTotalW, unsigned long cTotalN, CNodeTerminal *pThisNode, CNode **ppParentPointerToThisNode, CNodeFactory *pNodeFactory ) { GBMRESULT hr = GBM_OK; dInitSumZ = dSumZ; dInitTotalW = dTotalW; cInitN = cTotalN; dBestLeftSumZ = 0.0; dBestLeftTotalW = 0.0; cBestLeftN = 0; dCurrentLeftSumZ = 0.0; dCurrentLeftTotalW = 0.0; cCurrentLeftN = 0; dBestRightSumZ = dSumZ; dBestRightTotalW = dTotalW; cBestRightN = cTotalN; dCurrentRightSumZ = 0.0; dCurrentRightTotalW = dTotalW; cCurrentRightN = cTotalN; dBestMissingSumZ = 0.0; dBestMissingTotalW = 0.0; cBestMissingN = 0; dCurrentMissingSumZ = 0.0; dCurrentMissingTotalW = 0.0; cCurrentMissingN = 0; dBestImprovement = 0.0; iBestSplitVar = UINT_MAX; dCurrentImprovement = 0.0; iCurrentSplitVar = UINT_MAX; dCurrentSplitValue = -HUGE_VAL; fIsSplit = false; this->pThisNode = pThisNode; this->ppParentPointerToThisNode = ppParentPointerToThisNode; this->pNodeFactory = pNodeFactory; return hr; } GBMRESULT CNodeSearch::ResetForNewVar ( unsigned long iWhichVar, long cCurrentVarClasses ) { GBMRESULT hr = GBM_OK; long i=0; if(fIsSplit) goto Cleanup; for(i=0; icCurrentVarClasses = cCurrentVarClasses; dCurrentLeftSumZ = 0.0; dCurrentLeftTotalW = 0.0; cCurrentLeftN = 0; dCurrentRightSumZ = dInitSumZ; dCurrentRightTotalW = dInitTotalW; cCurrentRightN = cInitN; dCurrentMissingSumZ = 0.0; dCurrentMissingTotalW = 0.0; cCurrentMissingN = 0; dCurrentImprovement = 0.0; dLastXValue = -HUGE_VAL; Cleanup: return hr; } GBMRESULT CNodeSearch::WrapUpCurrentVariable() { GBMRESULT hr = GBM_OK; if(iCurrentSplitVar == iBestSplitVar) { if(cCurrentMissingN > 0) { dBestMissingSumZ = dCurrentMissingSumZ; dBestMissingTotalW = dCurrentMissingTotalW; cBestMissingN = cCurrentMissingN; } else // DEBUG: consider a weighted average with parent node? { dBestMissingSumZ = dInitSumZ; dBestMissingTotalW = dInitTotalW; cBestMissingN = 0; } } return hr; } GBMRESULT CNodeSearch::EvaluateCategoricalSplit() { GBMRESULT hr = GBM_OK; long i=0; long j=0; unsigned long cFiniteMeans = 0; if(fIsSplit) goto Cleanup; if(cCurrentVarClasses == 0) { hr = GBM_INVALIDARG; goto Error; } cFiniteMeans = 0; for(i=0; i1) && ((ULONG)i= cMinObsInNode) && (cCurrentRightN >= cMinObsInNode) && (dCurrentImprovement > dBestImprovement)) { dBestSplitValue = dCurrentSplitValue; if(iBestSplitVar != iCurrentSplitVar) { iBestSplitVar = iCurrentSplitVar; cBestVarClasses = cCurrentVarClasses; for(j=0; jGetNewNodeTerminal(); pNewRightNode = pNodeFactory->GetNewNodeTerminal(); pNewMissingNode = pNodeFactory->GetNewNodeTerminal(); // set up a continuous split if(cBestVarClasses==0) { pNewNodeContinuous = pNodeFactory->GetNewNodeContinuous(); pNewNodeContinuous->dSplitValue = dBestSplitValue; pNewNodeContinuous->iSplitVar = iBestSplitVar; pNewSplitNode = pNewNodeContinuous; } else { // get a new categorical node and its branches pNewNodeCategorical = pNodeFactory->GetNewNodeCategorical(); // set up the categorical split pNewNodeCategorical->iSplitVar = iBestSplitVar; pNewNodeCategorical->cLeftCategory = (ULONG)dBestSplitValue + 1; pNewNodeCategorical->aiLeftCategory = new ULONG[pNewNodeCategorical->cLeftCategory]; for(i=0; icLeftCategory; i++) { pNewNodeCategorical->aiLeftCategory[i] = aiBestCategory[i]; } pNewSplitNode = pNewNodeCategorical; } *ppParentPointerToThisNode = pNewSplitNode; pNewSplitNode->dPrediction = pThisNode->dPrediction; pNewSplitNode->dImprovement = dBestImprovement; pNewSplitNode->dTrainW = pThisNode->dTrainW; pNewSplitNode->pLeftNode = pNewLeftNode; pNewSplitNode->pRightNode = pNewRightNode; pNewSplitNode->pMissingNode = pNewMissingNode; pNewLeftNode->dPrediction = dBestLeftSumZ/dBestLeftTotalW; pNewLeftNode->dTrainW = dBestLeftTotalW; pNewLeftNode->cN = cBestLeftN; pNewRightNode->dPrediction = dBestRightSumZ/dBestRightTotalW; pNewRightNode->dTrainW = dBestRightTotalW; pNewRightNode->cN = cBestRightN; pNewMissingNode->dPrediction = dBestMissingSumZ/dBestMissingTotalW; pNewMissingNode->dTrainW = dBestMissingTotalW; pNewMissingNode->cN = cBestMissingN; pThisNode->RecycleSelf(pNodeFactory); return hr; } gbm/src/node.cpp0000644000176000001440000000224013064145661013273 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "node.h" CNode::CNode() { dPrediction = 0.0; dTrainW = 0.0; isTerminal = false; } CNode::~CNode() { // the nodes get deleted by deleting the node factory } GBMRESULT CNode::Adjust ( unsigned long cMinObsInNode ) { GBMRESULT hr = GBM_NOTIMPL; return hr; } GBMRESULT CNode::Predict ( CDataset *pData, unsigned long iRow, double &dFadj ) { GBMRESULT hr = GBM_NOTIMPL; return hr; } double CNode::TotalError() { GBMRESULT hr = GBM_NOTIMPL; return hr; } GBMRESULT CNode::PrintSubtree ( unsigned long cIndent ) { GBMRESULT hr = GBM_NOTIMPL; return hr; } GBMRESULT CNode::GetVarRelativeInfluence ( double *adRelInf ) { GBMRESULT hr = GBM_NOTIMPL; return hr; } GBMRESULT CNode::TransferTreeToRList ( int &iNodeID, CDataset *pData, int *aiSplitVar, double *adSplitPoint, int *aiLeftNode, int *aiRightNode, int *aiMissingNode, double *adErrorReduction, double *adWeight, double *adPred, VEC_VEC_CATEGORIES &vecSplitCodes, int cCatSplitsOld, double dShrinkage ) { return GBM_NOTIMPL; } gbm/src/bernoulli.cpp0000644000176000001440000001071513064145661014347 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "bernoulli.h" CBernoulli::CBernoulli() { } CBernoulli::~CBernoulli() { } GBMRESULT CBernoulli::ComputeWorkingResponse ( double *adY, double *adMisc, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff ) { unsigned long i = 0; double dProb = 0.0; double dF = 0.0; for(i=0; i 0.0001) { dNum=0.0; dDen=0.0; for(i=0; idPrediction = 0.0; } else { vecpTermNodes[iNode]->dPrediction = vecdNum[iNode]/vecdDen[iNode]; } } } return hr; } double CBernoulli::BagImprovement ( double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain ) { double dReturnValue = 0.0; double dF = 0.0; double dW = 0.0; unsigned long i = 0; for(i=0; idAlpha = dAlpha; } CQuantile::~CQuantile() { } GBMRESULT CQuantile::ComputeWorkingResponse ( double *adY, double *adMisc, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff ) { unsigned long i = 0; if(adOffset == NULL) { for(i=0; i adF[i]) ? dAlpha : -(1.0-dAlpha); } } else { for(i=0; i adF[i]+adOffset[i]) ? dAlpha : -(1.0-dAlpha); } } return GBM_OK; } // DEBUG: needs weighted quantile GBMRESULT CQuantile::InitF ( double *adY, double *adMisc, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength ) { double dOffset=0.0; unsigned long i=0; vecd.resize(cLength); for(i=0; i adF[i]) { dL += adWeight[i]*dAlpha *(adY[i] - adF[i]); } else { dL += adWeight[i]*(1.0-dAlpha)*(adF[i] - adY[i]); } dW += adWeight[i]; } } else { for(i=cIdxOff; i adF[i] + adOffset[i]) { dL += adWeight[i]*dAlpha *(adY[i] - adF[i]-adOffset[i]); } else { dL += adWeight[i]*(1.0-dAlpha)*(adF[i]+adOffset[i] - adY[i]); } dW += adWeight[i]; } } return dL/dW; } // DEBUG: needs weighted quantile GBMRESULT CQuantile::FitBestConstant ( double *adY, double *adMisc, double *adOffset, double *adW, double *adF, double *adZ, unsigned long *aiNodeAssign, unsigned long nTrain, VEC_P_NODETERMINAL vecpTermNodes, unsigned long cTermNodes, unsigned long cMinObsInNode, bool *afInBag, double *adFadj, int cIdxOff ) { GBMRESULT hr = GBM_OK; unsigned long iNode = 0; unsigned long iObs = 0; unsigned long iVecd = 0; double dOffset; vecd.resize(nTrain); // should already be this size from InitF for(iNode=0; iNodecN >= cMinObsInNode) { iVecd = 0; for(iObs=0; iObsdPrediction = *max_element(vecd.begin(), vecd.begin()+iVecd); } else { nth_element(vecd.begin(), vecd.begin() + int(iVecd*dAlpha), vecd.begin() + int(iVecd)); vecpTermNodes[iNode]->dPrediction = *(vecd.begin() + int(iVecd*dAlpha)); } } } return hr; } double CQuantile::BagImprovement ( double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain ) { double dReturnValue = 0.0; double dF = 0.0; double dW = 0.0; unsigned long i = 0; for(i=0; i dF) { dReturnValue += adWeight[i]*dAlpha*(adY[i]-dF); } else { dReturnValue += adWeight[i]*(1-dAlpha)*(dF-adY[i]); } if(adY[i] > dF+dStepSize*adFadj[i]) { dReturnValue -= adWeight[i]*dAlpha* (adY[i] - dF-dStepSize*adFadj[i]); } else { dReturnValue -= adWeight[i]*(1-dAlpha)* (dF+dStepSize*adFadj[i] - adY[i]); } dW += adWeight[i]; } } return dReturnValue/dW; } gbm/src/huberized.h0000644000176000001440000000555313064145661014006 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // // File: bernoulli.h // // License: GNU GPL (version 2 or later) // // Contents: bernoulli object // // Owner: gregr@rand.org // // History: 3/26/2001 gregr created // 2/14/2003 gregr: adapted for R implementation // //------------------------------------------------------------------------------ #ifndef HUBERIZED_H #define HUBERIZED_H #include "distribution.h" #include "buildinfo.h" class CHuberized : public CDistribution { public: CHuberized(); virtual ~CHuberized(); GBMRESULT UpdateParams(double *adF, double *adOffset, double *adWeight, unsigned long cLength) { return GBM_OK; }; GBMRESULT ComputeWorkingResponse(double *adY, double *adMisc, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff); double Deviance(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, unsigned long cLength, int cIdxOff); GBMRESULT InitF(double *adY, double *adMisc, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength); GBMRESULT FitBestConstant(double *adY, double *adMisc, double *adOffset, double *adW, double *adF, double *adZ, unsigned long *aiNodeAssign, unsigned long nTrain, VEC_P_NODETERMINAL vecpTermNodes, unsigned long cTermNodes, unsigned long cMinObsInNode, bool *afInBag, double *adFadj, int cIdxOff); double BagImprovement(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain); private: vector vecdNum; vector vecdDen; }; #endif // HUBERIZED_H gbm/src/node_search.h0000644000176000001440000000625113064145661014273 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // // File: node_search.h // // License: GNU GPL (version 2 or later) // // Contents: does the searching for where to split a node // // Owner: gregr@rand.org // // History: 3/26/2001 gregr created // 2/14/2003 gregr: adapted for R implementation // //------------------------------------------------------------------------------ #ifndef NODESEARCH_H #define NODESEARCH_H #include "node_factory.h" #include "dataset.h" using namespace std; class CNodeSearch { public: CNodeSearch(); ~CNodeSearch(); GBMRESULT Initialize(unsigned long cMinObsInNode); GBMRESULT IncorporateObs(double dX, double dZ, double dW, long lMonotone); GBMRESULT Set(double dSumZ, double dTotalW, unsigned long cTotalN, CNodeTerminal *pThisNode, CNode **ppParentPointerToThisNode, CNodeFactory *pNodeFactory); GBMRESULT ResetForNewVar(unsigned long iWhichVar, long cVarClasses); double BestImprovement() { return dBestImprovement; } GBMRESULT SetToSplit() { fIsSplit = true; return GBM_OK; }; GBMRESULT SetupNewNodes(PCNodeNonterminal &pNewSplitNode, PCNodeTerminal &pNewLeftNode, PCNodeTerminal &pNewRightNode, PCNodeTerminal &pNewMissingNode); GBMRESULT EvaluateCategoricalSplit(); GBMRESULT WrapUpCurrentVariable(); double ThisNodePrediction() {return pThisNode->dPrediction;} bool operator<(const CNodeSearch &ns) {return dBestImprovement #include #include #include "dataset.h" #include "node_factory.h" #include "node_search.h" class CCARTTree { public: CCARTTree(); ~CCARTTree(); GBMRESULT Initialize(CNodeFactory *pNodeFactory); GBMRESULT grow(double *adZ, CDataset *pData, double *adAlgW, double *adF, unsigned long nTrain, unsigned long nBagged, double dLambda, unsigned long cMaxDepth, unsigned long cMinObsInNode, bool *afInBag, unsigned long *aiNodeAssign, CNodeSearch *aNodeSearch, VEC_P_NODETERMINAL &vecpTermNodes); GBMRESULT Reset(); GBMRESULT TransferTreeToRList(CDataset *pData, int *aiSplitVar, double *adSplitPoint, int *aiLeftNode, int *aiRightNode, int *aiMissingNode, double *adErrorReduction, double *adWeight, double *adPred, VEC_VEC_CATEGORIES &vecSplitCodes, int cCatSplitsOld, double dShrinkage); GBMRESULT PredictValid(CDataset *pData, unsigned long nValid, double *adFadj); GBMRESULT Predict(double *adX, unsigned long cRow, unsigned long cCol, unsigned long iRow, double &dFadj); GBMRESULT Adjust(unsigned long *aiNodeAssign, double *adFadj, unsigned long cTrain, VEC_P_NODETERMINAL &vecpTermNodes, unsigned long cMinObsInNode); GBMRESULT GetNodeCount(int &cNodes); GBMRESULT SetShrinkage(double dShrink) { this->dShrink = dShrink; return GBM_OK; } double GetShrinkage() {return dShrink;} GBMRESULT Print(); GBMRESULT GetVarRelativeInfluence(double *adRelInf); double dError; // total squared error before carrying out the splits private: GBMRESULT GetBestSplit(CDataset *pData, unsigned long nTrain, CNodeSearch *aNodeSearch, unsigned long cTerminalNodes, unsigned long *aiNodeAssign, bool *afInBag, double *adZ, double *adW, unsigned long &iBestNode, double &dBestNodeImprovement); CNode *pRootNode; double dShrink; // objects used repeatedly unsigned long cDepth; unsigned long cTerminalNodes; unsigned long cTotalNodeCount; unsigned long iObs; unsigned long iWhichNode; unsigned long iBestNode; double dBestNodeImprovement; double dSumZ; double dSumZ2; double dTotalW; signed char schWhichNode; CNodeFactory *pNodeFactory; CNodeNonterminal *pNewSplitNode; CNodeTerminal *pNewLeftNode; CNodeTerminal *pNewRightNode; CNodeTerminal *pNewMissingNode; CNodeTerminal *pInitialRootNode; }; typedef CCARTTree *PCCARTTree; #endif // TREGBM_H gbm/src/poisson.cpp0000644000176000001440000001234113064145661014043 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "poisson.h" CPoisson::CPoisson() { } CPoisson::~CPoisson() { } GBMRESULT CPoisson::ComputeWorkingResponse ( double *adY, double *adMisc, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff ) { unsigned long i = 0; double dF = 0.0; // compute working response for(i=0; i < nTrain; i++) { dF = adF[i] + ((adOffset==NULL) ? 0.0 : adOffset[i]); adZ[i] = adY[i] - exp(dF); } return GBM_OK; } GBMRESULT CPoisson::InitF ( double *adY, double *adMisc, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength ) { GBMRESULT hr = GBM_OK; double dSum = 0.0; double dDenom = 0.0; unsigned long i = 0; if(adOffset == NULL) { for(i=0; idPrediction = -19.0; } else if(vecdDen[iNode] == 0.0) { vecpTermNodes[iNode]->dPrediction = 0.0; } else { vecpTermNodes[iNode]->dPrediction = log(vecdNum[iNode]/vecdDen[iNode]); } vecpTermNodes[iNode]->dPrediction = fmin2(vecpTermNodes[iNode]->dPrediction, 19-vecdMax[iNode]); vecpTermNodes[iNode]->dPrediction = fmax2(vecpTermNodes[iNode]->dPrediction, -19-vecdMin[iNode]); } } return hr; } double CPoisson::BagImprovement ( double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain ) { double dReturnValue = 0.0; double dF = 0.0; double dW = 0.0; unsigned long i = 0; for(i=0; i #include "distribution.h" #include "locationm.h" class CMultinomial : public CDistribution { public: CMultinomial(int cNumClasses, int cRows); virtual ~CMultinomial(); GBMRESULT UpdateParams(double *adF, double *adOffset, double *adWeight, unsigned long cLength); GBMRESULT ComputeWorkingResponse(double *adY, double *adMisc, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff); GBMRESULT InitF(double *adY, double *adMisc, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength); GBMRESULT FitBestConstant(double *adY, double *adMisc, double *adOffset, double *adW, double *adF, double *adZ, unsigned long *aiNodeAssign, unsigned long nTrain, VEC_P_NODETERMINAL vecpTermNodes, unsigned long cTermNodes, unsigned long cMinObsInNode, bool *afInBag, double *adFadj, int cIdxOff); double Deviance(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, unsigned long cLength, int cIdxOff); double BagImprovement(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain); private: unsigned long mcNumClasses; unsigned long mcRows; double *madProb; }; #endif // KMULTICGBM_H gbm/src/adaboost.cpp0000644000176000001440000001027413064145661014150 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "adaboost.h" CAdaBoost::CAdaBoost() { } CAdaBoost::~CAdaBoost() { } GBMRESULT CAdaBoost::ComputeWorkingResponse ( double *adY, double *adMisc, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff ) { unsigned long i = 0; if(adOffset == NULL) { for(i=0; idPrediction = 0.0; } else { vecpTermNodes[iNode]->dPrediction = vecdNum[iNode]/vecdDen[iNode]; } } } return hr; } double CAdaBoost::BagImprovement ( double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain ) { double dReturnValue = 0.0; double dF = 0.0; double dW = 0.0; unsigned long i = 0; for(i=0; i #include #include "node_nonterminal.h" class CNodeCategorical : public CNodeNonterminal { public: CNodeCategorical(); ~CNodeCategorical(); GBMRESULT PrintSubtree(unsigned long cIndent); GBMRESULT TransferTreeToRList(int &iNodeID, CDataset *pData, int *aiSplitVar, double *adSplitPoint, int *aiLeftNode, int *aiRightNode, int *aiMissingNode, double *adErrorReduction, double *adWeight, double *adPred, VEC_VEC_CATEGORIES &vecSplitCodes, int cCatSplitsOld, double dShrinkage); signed char WhichNode(CDataset *pData, unsigned long iObs); signed char WhichNode(double *adX, unsigned long cRow, unsigned long cCol, unsigned long iRow); GBMRESULT RecycleSelf(CNodeFactory *pNodeFactory); unsigned long *aiLeftCategory; unsigned long cLeftCategory; }; typedef CNodeCategorical *PCNodeCategorical; #endif // NODECATEGORICAL_H gbm/src/node_factory.h0000644000176000001440000000317513064145661014477 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // // File: node_factory.h // // License: GNU GPL (version 2 or later) // // Contents: manager for allocation and destruction of all nodes // // Owner: gregr@rand.org // // History: 3/26/2001 gregr created // 2/14/2003 gregr: adapted for R implementation // //------------------------------------------------------------------------------ #ifndef NODEFACTORY_H #define NODEFACTORY_H #include #include #include "node_terminal.h" #include "node_continuous.h" #include "node_categorical.h" #define NODEFACTORY_NODGBM_RESERVE ((unsigned long)101) using namespace std; class CNodeFactory { public: CNodeFactory(); ~CNodeFactory(); GBMRESULT Initialize(unsigned long cDepth); CNodeTerminal* GetNewNodeTerminal(); CNodeContinuous* GetNewNodeContinuous(); CNodeCategorical* GetNewNodeCategorical(); GBMRESULT RecycleNode(CNodeTerminal *pNode); GBMRESULT RecycleNode(CNodeContinuous *pNode); GBMRESULT RecycleNode(CNodeCategorical *pNode); private: stack TerminalStack; stack ContinuousStack; stack CategoricalStack; CNodeTerminal* pNodeTerminalTemp; CNodeContinuous* pNodeContinuousTemp; CNodeCategorical* pNodeCategoricalTemp; CNodeTerminal aBlockTerminal[NODEFACTORY_NODGBM_RESERVE]; CNodeContinuous aBlockContinuous[NODEFACTORY_NODGBM_RESERVE]; CNodeCategorical aBlockCategorical[NODEFACTORY_NODGBM_RESERVE]; }; #endif // NODEFACTORY_H gbm/src/gbm.cpp0000644000176000001440000001332713064145661013123 0ustar ripleyusers//------------------------------------------------------------------------------ // // GBM by Greg Ridgeway Copyright (C) 2003 // File: gbm.cpp // //------------------------------------------------------------------------------ #include #include "gbm.h" // Count the number of distinct groups in the input data int num_groups(const double* adMisc, int cTrain) { if (cTrain <= 0) { return 0; } double dLastGroup = adMisc[0]; int cGroups = 1; for(int i=1; iSetData(adX,aiXOrder,adY,adOffset,adWeight,adMisc, cRows,cCols,acVarClasses,alMonotoneVar); if(GBM_FAILED(hr)) { goto Error; } // set the distribution if(strncmp(pszFamily,"bernoulli",2) == 0) { pDist = new CBernoulli(); if(pDist==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } } else if(strncmp(pszFamily,"gaussian",2) == 0) { pDist = new CGaussian(); if(pDist==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } } else if(strncmp(pszFamily,"poisson",2) == 0) { pDist = new CPoisson(); if(pDist==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } } else if(strncmp(pszFamily,"adaboost",2) == 0) { pDist = new CAdaBoost(); if(pDist==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } } else if(strncmp(pszFamily,"coxph",2) == 0) { pDist = new CCoxPH(); if(pDist==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } } else if(strncmp(pszFamily,"laplace",2) == 0) { pDist = new CLaplace(); if(pDist==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } } else if(strncmp(pszFamily,"quantile",2) == 0) { pDist = new CQuantile(adMisc[0]); if(pDist==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } } else if(strncmp(pszFamily,"tdist",2) == 0) { pDist = new CTDist(adMisc[0]); if(pDist==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } } else if(strncmp(pszFamily,"multinomial",2) == 0) { pDist = new CMultinomial(cNumClasses, cRows); if(pDist==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } } else if(strncmp(pszFamily,"huberized",2) == 0) { pDist = new CHuberized(); if(pDist==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } } else if(strcmp(pszFamily,"pairwise_conc") == 0) { pDist = new CPairwise("conc"); if(pDist==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } } else if(strcmp(pszFamily,"pairwise_ndcg") == 0) { pDist = new CPairwise("ndcg"); if(pDist==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } } else if(strcmp(pszFamily,"pairwise_map") == 0) { pDist = new CPairwise("map"); if(pDist==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } } else if(strcmp(pszFamily,"pairwise_mrr") == 0) { pDist = new CPairwise("mrr"); if(pDist==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } } else { hr = GBM_INVALIDARG; goto Error; } if(pDist==NULL) { hr = GBM_INVALIDARG; goto Error; } if (!strncmp(pszFamily, "pairwise", strlen("pairwise"))) { cGroups = num_groups(adMisc, cTrain); } Cleanup: return hr; Error: goto Cleanup; } GBMRESULT gbm_transfer_to_R ( CGBM *pGBM, VEC_VEC_CATEGORIES &vecSplitCodes, int *aiSplitVar, double *adSplitPoint, int *aiLeftNode, int *aiRightNode, int *aiMissingNode, double *adErrorReduction, double *adWeight, double *adPred, int cCatSplitsOld ) { GBMRESULT hr = GBM_OK; hr = pGBM->TransferTreeToRList(aiSplitVar, adSplitPoint, aiLeftNode, aiRightNode, aiMissingNode, adErrorReduction, adWeight, adPred, vecSplitCodes, cCatSplitsOld); if(GBM_FAILED(hr)) goto Error; Cleanup: return hr; Error: goto Cleanup; } GBMRESULT gbm_transfer_catsplits_to_R ( int iCatSplit, VEC_VEC_CATEGORIES &vecSplitCodes, int *aiSplitCodes ) { unsigned long i=0; for(i=0; i #include "node_nonterminal.h" class CNodeContinuous : public CNodeNonterminal { public: CNodeContinuous(); ~CNodeContinuous(); GBMRESULT PrintSubtree(unsigned long cIndent); GBMRESULT TransferTreeToRList(int &iNodeID, CDataset *pData, int *aiSplitVar, double *adSplitPoint, int *aiLeftNode, int *aiRightNode, int *aiMissingNode, double *adErrorReduction, double *adWeight, double *adPred, VEC_VEC_CATEGORIES &vecSplitCodes, int cCatSplitsOld, double dShrinkage); signed char WhichNode(CDataset *pData, unsigned long iObs); signed char WhichNode(double *adX, unsigned long cRow, unsigned long cCol, unsigned long iRow); GBMRESULT RecycleSelf(CNodeFactory *pNodeFactory); double dSplitValue; }; typedef CNodeContinuous *PCNodeContinuous; #endif // NODECONTINUOUS_H gbm/src/coxph.cpp0000644000176000001440000001375513064145661013504 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "coxph.h" CCoxPH::CCoxPH() { } CCoxPH::~CCoxPH() { } GBMRESULT CCoxPH::ComputeWorkingResponse ( double *adT, double *adDelta, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff ) { unsigned long i = 0; double dF = 0.0; double dTot = 0.0; double dRiskTot = 0.0; vecdRiskTot.resize(nTrain); dRiskTot = 0.0; for(i=0; icN >= cMinObsInNode) { veciK2Node[K] = i; veciNode2K[i] = K; K++; } } vecdP.resize(K); matH.setactualsize(K-1); vecdG.resize(K-1); vecdG.assign(K-1,0.0); // zero the Hessian for(k=0; kcN >= cMinObsInNode)) { dF = adF[i] + ((adOffset==NULL) ? 0.0 : adOffset[i]); vecdP[veciNode2K[aiNodeAssign[i]]] += adW[i]*exp(dF); dRiskTot += adW[i]*exp(dF); if(adDelta[i]==1.0) { // compute g and H for(k=0; kdPrediction = 0.0; } for(m=0; mdPrediction = 0.0; break; } else { vecpTermNodes[veciK2Node[k]]->dPrediction -= dTemp*vecdG[m]; } } } // vecpTermNodes[veciK2Node[K-1]]->dPrediction = 0.0; // already set to 0.0 return hr; } double CCoxPH::BagImprovement ( double *adT, double *adDelta, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain ) { double dReturnValue = 0.0; double dNum = 0.0; double dDen = 0.0; double dF = 0.0; double dW = 0.0; unsigned long i = 0; dNum = 0.0; dDen = 0.0; for(i=0; i= cRows) || (iCol >= cCols)) { hr = GBM_INVALIDARG; goto Error; } dValue = adX[iCol*cRows + iRow]; Cleanup: return hr; Error: goto Cleanup; } bool fHasOffset; double *adX; int *aiXOrder; double *adXTemp4Order; double *adY; double *adOffset; double *adWeight; double *adMisc; char **apszVarNames; int *acVarClasses; int *alMonotoneVar; int cRows; int cCols; private: }; #endif // DATASET_H gbm/src/dataset.cpp0000644000176000001440000000314113064145661013774 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "dataset.h" CDataset::CDataset() { fHasOffset = false; adX = NULL; aiXOrder = NULL; adXTemp4Order = NULL; adY = NULL; adOffset = NULL; adWeight = NULL; apszVarNames = NULL; cRows = 0; cCols = 0; } CDataset::~CDataset() { } GBMRESULT CDataset::ResetWeights() { GBMRESULT hr = GBM_OK; int i = 0; if(adWeight == NULL) { hr = GBM_INVALIDARG; goto Error; } for(i=0; icRows = cRows; this->cCols = cCols; this->adX = adX; this->aiXOrder = aiXOrder; this->adY = adY; this->adOffset = adOffset; this->adWeight = adWeight; this->acVarClasses = acVarClasses; this->alMonotoneVar = alMonotoneVar; if((adOffset != NULL) && !ISNA(*adOffset)) { this->adOffset = adOffset; fHasOffset = true; } else { this->adOffset = NULL; fHasOffset = false; } if((adMisc != NULL) && !ISNA(*adMisc)) { this->adMisc = adMisc; } else { this->adMisc = NULL; } Cleanup: return hr; Error: goto Cleanup; } gbm/src/node_categorical.cpp0000644000176000001440000001404113064145661015632 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "node_categorical.h" #include "node_factory.h" CNodeCategorical::CNodeCategorical() { aiLeftCategory = NULL; cLeftCategory = 0; } CNodeCategorical::~CNodeCategorical() { #ifdef NOISY_DEBUG Rprintf("categorical destructor\n"); #endif if(aiLeftCategory != NULL) { delete [] aiLeftCategory; aiLeftCategory = NULL; } } GBMRESULT CNodeCategorical::PrintSubtree ( unsigned long cIndent ) { GBMRESULT hr = GBM_OK; unsigned long i = 0; for(i=0; i< cIndent; i++) Rprintf(" "); Rprintf("N=%f, Improvement=%f, Prediction=%f, NA pred=%f\n", dTrainW, dImprovement, dPrediction, (pMissingNode == NULL ? 0.0 : pMissingNode->dPrediction)); for(i=0; i< cIndent; i++) Rprintf(" "); Rprintf("V%d in ",iSplitVar); for(i=0; iPrintSubtree(cIndent+1); for(i=0; i< cIndent; i++) Rprintf(" "); Rprintf("V%d not in ",iSplitVar); for(i=0; iPrintSubtree(cIndent+1); for(i=0; i< cIndent; i++) Rprintf(" "); Rprintf("missing\n"); hr = pMissingNode->PrintSubtree(cIndent+1); return hr; } signed char CNodeCategorical::WhichNode ( CDataset *pData, unsigned long iObs ) { signed char ReturnValue = 0; double dX = pData->adX[iSplitVar*(pData->cRows) + iObs]; if(!ISNA(dX)) { if(std::find(aiLeftCategory, aiLeftCategory+cLeftCategory, (ULONG)dX) != aiLeftCategory+cLeftCategory) { ReturnValue = -1; } else { ReturnValue = 1; } } // if missing value returns 0 return ReturnValue; } signed char CNodeCategorical::WhichNode ( double *adX, unsigned long cRow, unsigned long cCol, unsigned long iRow ) { signed char ReturnValue = 0; double dX = adX[iSplitVar*cRow + iRow]; if(!ISNA(dX)) { if(std::find(aiLeftCategory, aiLeftCategory+cLeftCategory, (ULONG)dX) != aiLeftCategory+cLeftCategory) { ReturnValue = -1; } else { ReturnValue = 1; } } // if missing value returns 0 return ReturnValue; } GBMRESULT CNodeCategorical::RecycleSelf ( CNodeFactory *pNodeFactory ) { GBMRESULT hr = GBM_OK; hr = pNodeFactory->RecycleNode(this); return hr; }; GBMRESULT CNodeCategorical::TransferTreeToRList ( int &iNodeID, CDataset *pData, int *aiSplitVar, double *adSplitPoint, int *aiLeftNode, int *aiRightNode, int *aiMissingNode, double *adErrorReduction, double *adWeight, double *adPred, VEC_VEC_CATEGORIES &vecSplitCodes, int cCatSplitsOld, double dShrinkage ) { GBMRESULT hr = GBM_OK; int iThisNodeID = iNodeID; unsigned long cCatSplits = vecSplitCodes.size(); unsigned long i = 0; int cLevels = pData->acVarClasses[iSplitVar]; aiSplitVar[iThisNodeID] = iSplitVar; adSplitPoint[iThisNodeID] = cCatSplits+cCatSplitsOld; // 0 based adErrorReduction[iThisNodeID] = dImprovement; adWeight[iThisNodeID] = dTrainW; adPred[iThisNodeID] = dShrinkage*dPrediction; vecSplitCodes.push_back(VEC_CATEGORIES()); vecSplitCodes[cCatSplits].resize(cLevels,1); for(i=0; iTransferTreeToRList(iNodeID, pData, aiSplitVar, adSplitPoint, aiLeftNode, aiRightNode, aiMissingNode, adErrorReduction, adWeight, adPred, vecSplitCodes, cCatSplitsOld, dShrinkage); if(GBM_FAILED(hr)) goto Error; aiRightNode[iThisNodeID] = iNodeID; hr = pRightNode->TransferTreeToRList(iNodeID, pData, aiSplitVar, adSplitPoint, aiLeftNode, aiRightNode, aiMissingNode, adErrorReduction, adWeight, adPred, vecSplitCodes, cCatSplitsOld, dShrinkage); if(GBM_FAILED(hr)) goto Error; aiMissingNode[iThisNodeID] = iNodeID; hr = pMissingNode->TransferTreeToRList(iNodeID, pData, aiSplitVar, adSplitPoint, aiLeftNode, aiRightNode, aiMissingNode, adErrorReduction, adWeight, adPred, vecSplitCodes, cCatSplitsOld, dShrinkage); if(GBM_FAILED(hr)) goto Error; Cleanup: return hr; Error: goto Cleanup; } gbm/src/coxph.h0000644000176000001440000000577713064145661013156 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // // File: coxph.h // // License: GNU GPL (version 2 or later) // // Contents: Cox proportional hazard object // // Owner: gregr@rand.org // // History: 3/26/2001 gregr created // 2/14/2003 gregr: adapted for R implementation // //------------------------------------------------------------------------------ #ifndef COXPH_H #define COXPH_H #include "distribution.h" #include "matrix.h" class CCoxPH : public CDistribution { public: CCoxPH(); virtual ~CCoxPH(); GBMRESULT UpdateParams(double *adF, double *adOffset, double *adWeight, unsigned long cLength) { return GBM_OK; }; GBMRESULT ComputeWorkingResponse(double *adT, double *adDelta, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff); GBMRESULT InitF(double *adT, double *adDelta, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength); GBMRESULT FitBestConstant(double *adT, double *adDelta, double *adOffset, double *adW, double *adF, double *adZ, unsigned long *aiNodeAssign, unsigned long nTrain, VEC_P_NODETERMINAL vecpTermNodes, unsigned long cTermNodes, unsigned long cMinObsInNode, bool *afInBag, double *adFadj, int cIdxOff); double Deviance(double *adT, double *adDelta, double *adOffset, double *adWeight, double *adF, unsigned long cLength, int cIdxOff); double BagImprovement(double *adT, double *adDelta, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain); private: vector vecdP; vector vecdRiskTot; vector vecdG; vector veciK2Node; vector veciNode2K; matrix matH; matrix matHinv; }; #endif // COXPH_H gbm/src/laplace.cpp0000644000176000001440000001027013064145661013751 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "laplace.h" CLaplace::CLaplace() { mpLocM = NULL; } CLaplace::~CLaplace() { if(mpLocM != NULL) { delete mpLocM; } } GBMRESULT CLaplace::ComputeWorkingResponse ( double *adY, double *adMisc, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff ) { unsigned long i = 0; if(adOffset == NULL) { for(i=0; i 0.0 ? 1.0 : -1.0; } } else { for(i=0; i 0.0 ? 1.0 : -1.0; } } return GBM_OK; } GBMRESULT CLaplace::InitF ( double *adY, double *adMisc, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength ) { GBMRESULT hr = GBM_OK; double dOffset = 0.0; unsigned long ii = 0; int nLength = int(cLength); double *adArr = NULL; // Create a new LocationM object (for weighted medians) double *pTemp = NULL; mpLocM = new CLocationM("Other", 0, pTemp); if(mpLocM == NULL) { hr = GBM_OUTOFMEMORY; goto Error; } adArr = new double[cLength]; if(adArr == NULL) { hr = GBM_OUTOFMEMORY; goto Error; } for (ii = 0; ii < cLength; ii++) { dOffset = (adOffset==NULL) ? 0.0 : adOffset[ii]; adArr[ii] = adY[ii] - dOffset; } dInitF = mpLocM->Median(nLength, adArr, adWeight); Cleanup: return hr; Error: goto Cleanup; } double CLaplace::Deviance ( double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, unsigned long cLength, int cIdxOff ) { unsigned long i=0; double dL = 0.0; double dW = 0.0; if(adOffset == NULL) { for(i=cIdxOff; icN >= cMinObsInNode) { iVecd = 0; for(iObs=0; iObsdPrediction = mpLocM->Median(iVecd, adArr, adW2); } } return hr; } double CLaplace::BagImprovement ( double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain ) { double dReturnValue = 0.0; double dF = 0.0; double dW = 0.0; unsigned long i = 0; for(i=0; i #include "dataset.h" #include "node.h" using namespace std; class CNodeTerminal : public CNode { public: CNodeTerminal(); ~CNodeTerminal(); GBMRESULT Adjust(unsigned long cMinObsInNode); GBMRESULT PrintSubtree(unsigned long cIndent); GBMRESULT TransferTreeToRList(int &iNodeID, CDataset *pData, int *aiSplitVar, double *adSplitPoint, int *aiLeftNode, int *aiRightNode, int *aiMissingNode, double *adErrorReduction, double *adWeight, double *adPred, VEC_VEC_CATEGORIES &vecSplitCodes, int cCatSplitsOld, double dShrinkage); GBMRESULT ApplyShrinkage(double dLambda); GBMRESULT Predict(CDataset *pData, unsigned long i, double &dFadj); GBMRESULT Predict(double *adX, unsigned long cRow, unsigned long cCol, unsigned long iRow, double &dFadj); GBMRESULT GetVarRelativeInfluence(double *adRelInf); GBMRESULT RecycleSelf(CNodeFactory *pNodeFactory); }; typedef CNodeTerminal *PCNodeTerminal; typedef vector VEC_P_NODETERMINAL; #endif // NODETERMINAL_H gbm/src/node_nonterminal.cpp0000644000176000001440000000467513064145661015717 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "node_nonterminal.h" CNodeNonterminal::CNodeNonterminal() { pLeftNode = NULL; pRightNode = NULL; iSplitVar = 0; dImprovement = 0.0; pMissingNode = NULL; } CNodeNonterminal::~CNodeNonterminal() { } GBMRESULT CNodeNonterminal::Adjust ( unsigned long cMinObsInNode ) { GBMRESULT hr = GBM_OK; hr = pLeftNode->Adjust(cMinObsInNode); hr = pRightNode->Adjust(cMinObsInNode); if(pMissingNode->isTerminal && (pMissingNode->cN < cMinObsInNode)) { dPrediction = ((pLeftNode->dTrainW)*(pLeftNode->dPrediction) + (pRightNode->dTrainW)*(pRightNode->dPrediction))/ (pLeftNode->dTrainW + pRightNode->dTrainW); pMissingNode->dPrediction = dPrediction; } else { hr = pMissingNode->Adjust(cMinObsInNode); dPrediction = ((pLeftNode->dTrainW)* (pLeftNode->dPrediction) + (pRightNode->dTrainW)* (pRightNode->dPrediction) + (pMissingNode->dTrainW)*(pMissingNode->dPrediction))/ (pLeftNode->dTrainW + pRightNode->dTrainW + pMissingNode->dTrainW); } return hr; } GBMRESULT CNodeNonterminal::Predict ( CDataset *pData, unsigned long iRow, double &dFadj ) { GBMRESULT hr = GBM_OK; signed char schWhichNode = WhichNode(pData,iRow); if(schWhichNode == -1) { hr = pLeftNode->Predict(pData, iRow, dFadj); } else if(schWhichNode == 1) { hr = pRightNode->Predict(pData, iRow, dFadj); } else { hr = pMissingNode->Predict(pData, iRow, dFadj); } return hr; } GBMRESULT CNodeNonterminal::Predict ( double *adX, unsigned long cRow, unsigned long cCol, unsigned long iRow, double &dFadj ) { GBMRESULT hr = GBM_OK; signed char schWhichNode = WhichNode(adX,cRow,cCol,iRow); if(schWhichNode == -1) { hr = pLeftNode->Predict(adX,cRow,cCol,iRow,dFadj); } else if(schWhichNode == 1) { hr = pRightNode->Predict(adX,cRow,cCol,iRow,dFadj); } else { hr = pMissingNode->Predict(adX,cRow,cCol,iRow,dFadj); } return hr; } GBMRESULT CNodeNonterminal::GetVarRelativeInfluence ( double *adRelInf ) { GBMRESULT hr = GBM_OK; adRelInf[iSplitVar] += dImprovement; pLeftNode->GetVarRelativeInfluence(adRelInf); pRightNode->GetVarRelativeInfluence(adRelInf); return hr; } gbm/src/init.c0000644000176000001440000000217313064145661012756 0ustar ripleyusers#include #include #include // for NULL #include /* .Call calls */ extern SEXP gbm(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP gbm_plot(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP gbm_pred(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP gbm_shrink_gradient(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); extern SEXP gbm_shrink_pred(SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP, SEXP); static const R_CallMethodDef CallEntries[] = { {"gbm", (DL_FUNC) &gbm, 22}, {"gbm_plot", (DL_FUNC) &gbm_plot, 10}, {"gbm_pred", (DL_FUNC) &gbm_pred, 10}, {"gbm_shrink_gradient", (DL_FUNC) &gbm_shrink_gradient, 11}, {"gbm_shrink_pred", (DL_FUNC) &gbm_shrink_pred, 10}, {NULL, NULL, 0} }; void R_init_gbm(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, NULL); R_useDynamicSymbols(dll, FALSE); } gbm/src/node_terminal.cpp0000644000176000001440000000432313064145661015172 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // // File: node_terminal.cpp // //------------------------------------------------------------------------------ #include "node_terminal.h" #include "node_factory.h" CNodeTerminal::CNodeTerminal() { isTerminal = true; } CNodeTerminal::~CNodeTerminal() { #ifdef NOISY_DEBUG Rprintf("terminal destructor\n"); #endif } GBMRESULT CNodeTerminal::Adjust ( unsigned long cMinObsInNode ) { return GBM_OK; } GBMRESULT CNodeTerminal::ApplyShrinkage ( double dLambda ) { GBMRESULT hr = GBM_OK; dPrediction *= dLambda; return hr; } GBMRESULT CNodeTerminal::Predict ( CDataset *pData, unsigned long iRow, double &dFadj ) { dFadj = dPrediction; return GBM_OK; } GBMRESULT CNodeTerminal::Predict ( double *adX, unsigned long cRow, unsigned long cCol, unsigned long iRow, double &dFadj ) { dFadj = dPrediction; return GBM_OK; } GBMRESULT CNodeTerminal::PrintSubtree ( unsigned long cIndent ) { unsigned long i = 0; for(i=0; i< cIndent; i++) Rprintf(" "); Rprintf("N=%f, Prediction=%f *\n", dTrainW, dPrediction); return GBM_OK; } GBMRESULT CNodeTerminal::GetVarRelativeInfluence ( double *adRelInf ) { return GBM_OK; } GBMRESULT CNodeTerminal::RecycleSelf ( CNodeFactory *pNodeFactory ) { pNodeFactory->RecycleNode(this); return GBM_OK; }; GBMRESULT CNodeTerminal::TransferTreeToRList ( int &iNodeID, CDataset *pData, int *aiSplitVar, double *adSplitPoint, int *aiLeftNode, int *aiRightNode, int *aiMissingNode, double *adErrorReduction, double *adWeight, double *adPred, VEC_VEC_CATEGORIES &vecSplitCodes, int cCatSplitsOld, double dShrinkage ) { GBMRESULT hr = GBM_OK; aiSplitVar[iNodeID] = -1; adSplitPoint[iNodeID] = dShrinkage*dPrediction; aiLeftNode[iNodeID] = -1; aiRightNode[iNodeID] = -1; aiMissingNode[iNodeID] = -1; adErrorReduction[iNodeID] = 0.0; adWeight[iNodeID] = dTrainW; adPred[iNodeID] = dShrinkage*dPrediction; iNodeID++; return hr; } gbm/src/gbmentry.cpp0000644000176000001440000011225313064145661014203 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "gbm.h" extern "C" { #include #include SEXP gbm ( SEXP radY, // outcome or response SEXP radOffset, // offset for f(x), NA for no offset SEXP radX, SEXP raiXOrder, SEXP radWeight, SEXP radMisc, // other row specific data (eg failure time), NA=no Misc SEXP rcRows, SEXP rcCols, SEXP racVarClasses, SEXP ralMonotoneVar, SEXP rszFamily, SEXP rcTrees, SEXP rcDepth, // interaction depth SEXP rcMinObsInNode, SEXP rcNumClasses, SEXP rdShrinkage, SEXP rdBagFraction, SEXP rcTrain, SEXP radFOld, SEXP rcCatSplitsOld, SEXP rcTreesOld, SEXP rfVerbose ) { unsigned long hr = 0; SEXP rAns = NULL; SEXP rNewTree = NULL; SEXP riSplitVar = NULL; SEXP rdSplitPoint = NULL; SEXP riLeftNode = NULL; SEXP riRightNode = NULL; SEXP riMissingNode = NULL; SEXP rdErrorReduction = NULL; SEXP rdWeight = NULL; SEXP rdPred = NULL; SEXP rdInitF = NULL; SEXP radF = NULL; SEXP radTrainError = NULL; SEXP radValidError = NULL; SEXP radOOBagImprove = NULL; SEXP rSetOfTrees = NULL; SEXP rSetSplitCodes = NULL; SEXP rSplitCode = NULL; VEC_VEC_CATEGORIES vecSplitCodes; int i = 0; int iT = 0; int iK = 0; int cTrees = INTEGER(rcTrees)[0]; const int cResultComponents = 7; // rdInitF, radF, radTrainError, radValidError, radOOBagImprove // rSetOfTrees, rSetSplitCodes const int cTreeComponents = 8; // riSplitVar, rdSplitPoint, riLeftNode, // riRightNode, riMissingNode, rdErrorReduction, rdWeight, rdPred int cNodes = 0; int cTrain = INTEGER(rcTrain)[0]; int cNumClasses = INTEGER(rcNumClasses)[0]; double dTrainError = 0.0; double dValidError = 0.0; double dOOBagImprove = 0.0; CGBM *pGBM = NULL; CDataset *pData = NULL; CDistribution *pDist = NULL; int cGroups = -1; // set up the dataset pData = new CDataset(); if(pData==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } // initialize R's random number generator GetRNGstate(); // initialize some things hr = gbm_setup(REAL(radY), REAL(radOffset), REAL(radX), INTEGER(raiXOrder), REAL(radWeight), REAL(radMisc), INTEGER(rcRows)[0], INTEGER(rcCols)[0], INTEGER(racVarClasses), INTEGER(ralMonotoneVar), CHAR(STRING_ELT(rszFamily,0)), INTEGER(rcTrees)[0], INTEGER(rcDepth)[0], INTEGER(rcMinObsInNode)[0], INTEGER(rcNumClasses)[0], REAL(rdShrinkage)[0], REAL(rdBagFraction)[0], INTEGER(rcTrain)[0], pData, pDist, cGroups); if(GBM_FAILED(hr)) { goto Error; } // allocate the GBM pGBM = new CGBM(); if(pGBM==NULL) { hr = GBM_OUTOFMEMORY; goto Error; } // initialize the GBM hr = pGBM->Initialize(pData, pDist, REAL(rdShrinkage)[0], cTrain, REAL(rdBagFraction)[0], INTEGER(rcDepth)[0], INTEGER(rcMinObsInNode)[0], INTEGER(rcNumClasses)[0], cGroups); if(GBM_FAILED(hr)) { goto Error; } // allocate the main return object PROTECT(rAns = allocVector(VECSXP, cResultComponents)); // allocate the initial value PROTECT(rdInitF = allocVector(REALSXP, 1)); SET_VECTOR_ELT(rAns,0,rdInitF); UNPROTECT(1); // rdInitF // allocate the predictions PROTECT(radF = allocVector(REALSXP, (pData->cRows) * cNumClasses)); SET_VECTOR_ELT(rAns,1,radF); UNPROTECT(1); // radF hr = pDist->Initialize(pData->adY, pData->adMisc, pData->adOffset, pData->adWeight, pData->cRows); if(ISNA(REAL(radFOld)[0])) // check for old predictions { // set the initial value of F as a constant hr = pDist->InitF(pData->adY, pData->adMisc, pData->adOffset, pData->adWeight, REAL(rdInitF)[0], cTrain); for(i=0; i < (pData->cRows) * cNumClasses; i++) { REAL(radF)[i] = REAL(rdInitF)[0]; } } else { for(i=0; i < (pData->cRows) * cNumClasses; i++) { REAL(radF)[i] = REAL(radFOld)[i]; } } // allocate space for the performance measures PROTECT(radTrainError = allocVector(REALSXP, cTrees)); PROTECT(radValidError = allocVector(REALSXP, cTrees)); PROTECT(radOOBagImprove = allocVector(REALSXP, cTrees)); SET_VECTOR_ELT(rAns,2,radTrainError); SET_VECTOR_ELT(rAns,3,radValidError); SET_VECTOR_ELT(rAns,4,radOOBagImprove); UNPROTECT(3); // radTrainError , radValidError, radOOBagImprove // allocate the component for the tree structures PROTECT(rSetOfTrees = allocVector(VECSXP, cTrees * cNumClasses)); SET_VECTOR_ELT(rAns,5,rSetOfTrees); UNPROTECT(1); // rSetOfTrees if(INTEGER(rfVerbose)[0]) { Rprintf("Iter TrainDeviance ValidDeviance StepSize Improve\n"); } for(iT=0; iTUpdateParams(REAL(radF), pData->adOffset, pData->adWeight, cTrain); if(GBM_FAILED(hr)) { goto Error; } REAL(radTrainError)[iT] = 0.0; REAL(radValidError)[iT] = 0.0; REAL(radOOBagImprove)[iT] = 0.0; for (iK = 0; iK < cNumClasses; iK++) { hr = pGBM->iterate(REAL(radF), dTrainError,dValidError,dOOBagImprove, cNodes, cNumClasses, iK); if(GBM_FAILED(hr)) { goto Error; } // store the performance measures REAL(radTrainError)[iT] += dTrainError; REAL(radValidError)[iT] += dValidError; REAL(radOOBagImprove)[iT] += dOOBagImprove; // allocate the new tree component for the R list structure PROTECT(rNewTree = allocVector(VECSXP, cTreeComponents)); // riNodeID,riSplitVar,rdSplitPoint,riLeftNode, // riRightNode,riMissingNode,rdErrorReduction,rdWeight PROTECT(riSplitVar = allocVector(INTSXP, cNodes)); PROTECT(rdSplitPoint = allocVector(REALSXP, cNodes)); PROTECT(riLeftNode = allocVector(INTSXP, cNodes)); PROTECT(riRightNode = allocVector(INTSXP, cNodes)); PROTECT(riMissingNode = allocVector(INTSXP, cNodes)); PROTECT(rdErrorReduction = allocVector(REALSXP, cNodes)); PROTECT(rdWeight = allocVector(REALSXP, cNodes)); PROTECT(rdPred = allocVector(REALSXP, cNodes)); SET_VECTOR_ELT(rNewTree,0,riSplitVar); SET_VECTOR_ELT(rNewTree,1,rdSplitPoint); SET_VECTOR_ELT(rNewTree,2,riLeftNode); SET_VECTOR_ELT(rNewTree,3,riRightNode); SET_VECTOR_ELT(rNewTree,4,riMissingNode); SET_VECTOR_ELT(rNewTree,5,rdErrorReduction); SET_VECTOR_ELT(rNewTree,6,rdWeight); SET_VECTOR_ELT(rNewTree,7,rdPred); UNPROTECT(cTreeComponents); SET_VECTOR_ELT(rSetOfTrees,(iK + iT * cNumClasses),rNewTree); UNPROTECT(1); // rNewTree hr = gbm_transfer_to_R(pGBM, vecSplitCodes, INTEGER(riSplitVar), REAL(rdSplitPoint), INTEGER(riLeftNode), INTEGER(riRightNode), INTEGER(riMissingNode), REAL(rdErrorReduction), REAL(rdWeight), REAL(rdPred), INTEGER(rcCatSplitsOld)[0]); } // Close for iK // print the information if((iT <= 9) || ((iT+1+INTEGER(rcTreesOld)[0])/20 == (iT+1+INTEGER(rcTreesOld)[0])/20.0) || (iT==cTrees-1)) { R_CheckUserInterrupt(); if(INTEGER(rfVerbose)[0]) { Rprintf("%6d %13.4f %15.4f %10.4f %9.4f\n", iT+1+INTEGER(rcTreesOld)[0], REAL(radTrainError)[iT], REAL(radValidError)[iT], REAL(rdShrinkage)[0], REAL(radOOBagImprove)[iT]); } } } if(INTEGER(rfVerbose)[0]) Rprintf("\n"); // transfer categorical splits to R PROTECT(rSetSplitCodes = allocVector(VECSXP, vecSplitCodes.size())); SET_VECTOR_ELT(rAns,6,rSetSplitCodes); UNPROTECT(1); // rSetSplitCodes for(i=0; i<(int)vecSplitCodes.size(); i++) { PROTECT(rSplitCode = allocVector(INTSXP, size_of_vector(vecSplitCodes,i))); SET_VECTOR_ELT(rSetSplitCodes,i,rSplitCode); UNPROTECT(1); // rSplitCode hr = gbm_transfer_catsplits_to_R(i, vecSplitCodes, INTEGER(rSplitCode)); } // dump random number generator seed #ifdef NOISY_DEBUG Rprintf("PutRNGstate\n"); #endif PutRNGstate(); Cleanup: UNPROTECT(1); // rAns #ifdef NOISY_DEBUG Rprintf("destructing\n"); #endif if(pGBM != NULL) { delete pGBM; pGBM = NULL; } if(pDist != NULL) { delete pDist; pDist = NULL; } if(pData != NULL) { delete pData; pData = NULL; } return rAns; Error: goto Cleanup; } SEXP gbm_pred ( SEXP radX, // the data matrix SEXP rcRows, // number of rows SEXP rcCols, // number of columns SEXP rcNumClasses, // number of classes SEXP rcTrees, // number of trees, may be a vector SEXP rdInitF, // the initial value SEXP rTrees, // the list of trees SEXP rCSplits, // the list of categorical splits SEXP raiVarType, // indicator of continuous/nominal SEXP riSingleTree // boolean whether to return only results for one tree ) { unsigned long hr = 0; int iTree = 0; int iObs = 0; int cRows = INTEGER(rcRows)[0]; int cPredIterations = LENGTH(rcTrees); int iPredIteration = 0; int cTrees = 0; int iClass = 0; int cNumClasses = INTEGER(rcNumClasses)[0]; SEXP rThisTree = NULL; int *aiSplitVar = NULL; double *adSplitCode = NULL; int *aiLeftNode = NULL; int *aiRightNode = NULL; int *aiMissingNode = NULL; int iCurrentNode = 0; double dX = 0.0; int iCatSplitIndicator = 0; bool fSingleTree = (INTEGER(riSingleTree)[0]==1); SEXP radPredF = NULL; // allocate the predictions to return PROTECT(radPredF = allocVector(REALSXP, cRows*cNumClasses*cPredIterations)); if(radPredF == NULL) { hr = GBM_OUTOFMEMORY; goto Error; } // initialize the predicted values if(!fSingleTree) { // initialize with the intercept for only the smallest rcTrees for(iObs=0; iObs0)) { // copy over from the last rcTrees for(iObs=0; iObs 0) { cStackNodes--; iCurrentNode = aiNodeStack[cStackNodes]; if(aiSplitVar[iCurrentNode] == -1) // terminal node { REAL(radPredF)[iClass*cRows + iObs] += adWeightStack[cStackNodes]*adSplitCode[iCurrentNode]; } else // non-terminal node { // is this a split variable that interests me? iPredVar = -1; for(i=0; (iPredVar == -1) && (i < cCols); i++) { if(INTEGER(raiWhichVar)[i] == aiSplitVar[iCurrentNode]) { iPredVar = i; // split is on one that interests me } } if(iPredVar != -1) // this split is among raiWhichVar { dX = REAL(radX)[iPredVar*cRows + iObs]; // missing? if(ISNA(dX)) { aiNodeStack[cStackNodes] = aiMissingNode[iCurrentNode]; cStackNodes++; } // continuous? else if(INTEGER(raiVarType)[aiSplitVar[iCurrentNode]] == 0) { if(dX < adSplitCode[iCurrentNode]) { aiNodeStack[cStackNodes] = aiLeftNode[iCurrentNode]; cStackNodes++; } else { aiNodeStack[cStackNodes] = aiRightNode[iCurrentNode]; cStackNodes++; } } else // categorical { iCatSplitIndicator = INTEGER( VECTOR_ELT(rCSplits, (int)adSplitCode[iCurrentNode]))[(int)dX]; if(iCatSplitIndicator==-1) { aiNodeStack[cStackNodes] = aiLeftNode[iCurrentNode]; cStackNodes++; } else if(iCatSplitIndicator==1) { aiNodeStack[cStackNodes] = aiRightNode[iCurrentNode]; cStackNodes++; } else // handle unused level { iCurrentNode = aiMissingNode[iCurrentNode]; } } } // iPredVar != -1 else // not interested in this split, average left and right { aiNodeStack[cStackNodes] = aiRightNode[iCurrentNode]; dCurrentW = adWeightStack[cStackNodes]; adWeightStack[cStackNodes] = dCurrentW * adW[aiRightNode[iCurrentNode]]/ (adW[aiLeftNode[iCurrentNode]]+ adW[aiRightNode[iCurrentNode]]); cStackNodes++; aiNodeStack[cStackNodes] = aiLeftNode[iCurrentNode]; adWeightStack[cStackNodes] = dCurrentW-adWeightStack[cStackNodes-1]; cStackNodes++; } } // non-terminal node } // while(cStackNodes > 0) } // iObs } // iClass } // iTree Cleanup: UNPROTECT(1); // radPredF return radPredF; Error: goto Cleanup; } // gbm_plot SEXP gbm_shrink_pred ( SEXP radX, SEXP rcRows, SEXP rcCols, SEXP rcNumClasses, SEXP racTrees, SEXP rdInitF, SEXP rTrees, SEXP rCSplits, SEXP raiVarType, SEXP rcInteractionDepth, SEXP radLambda ) { unsigned long hr = 0; int iTree = 0; int iPredictionIter = 0; int iObs = 0; int iClass = 0; int i = 0; int cRows = INTEGER(rcRows)[0]; int cNumClasses = INTEGER(rcNumClasses)[0]; double *adLambda = REAL(radLambda); double dLambda = 0.0; double dPred = 0.0; SEXP rThisTree = NULL; int *aiSplitVar = NULL; double *adSplitCode = NULL; int *aiLeftNode = NULL; int *aiRightNode = NULL; int *aiMissingNode = NULL; double *adNodeW = NULL; int iCurrentNode = 0; double dX = 0.0; int iCatSplitIndicator = 0; SEXP rResult = NULL; SEXP radPredF = NULL; // The predictions double *adPredF = NULL; // The shrunken predictions double *adNodePred = NULL; int *aiNodeStack = NULL; unsigned long cNodeStack = 0; int cMaxNodes = 1+3*(INTEGER(rcInteractionDepth)[0]); adPredF = new double[cRows * cNumClasses]; if(adPredF == NULL) { hr = GBM_OUTOFMEMORY; goto Error; } for(iObs=0; iObs0) { i = aiNodeStack[cNodeStack-1]; if(aiSplitVar[i]==-1) { adNodePred[i] = adSplitCode[i]; cNodeStack--; } else if(ISNA(adNodePred[aiLeftNode[i]])) { aiNodeStack[cNodeStack] = aiLeftNode[i]; cNodeStack++; aiNodeStack[cNodeStack] = aiRightNode[i]; cNodeStack++; // check whether missing node is the same as parent node // occurs when X_i has no missing values if(adNodeW[i] != adNodeW[aiMissingNode[i]]) { aiNodeStack[cNodeStack] = aiMissingNode[i]; cNodeStack++; } else { adNodePred[aiMissingNode[i]] = 0.0; } } else { // compute the parent node's prediction adNodePred[i] = (adNodeW[aiLeftNode[i]]*adNodePred[aiLeftNode[i]] + adNodeW[aiRightNode[i]]*adNodePred[aiRightNode[i]]+ adNodeW[aiMissingNode[i]]*adNodePred[aiMissingNode[i]])/ adNodeW[i]; cNodeStack--; } } // predict for the observations for(iObs=0; iObs 1) { adProb = new double[cNumClasses]; } // initialize the predicted values for(iObs=0; iObs 1) then calculate the probabilities if (cNumClasses > 1) { dDenom = 0.0; for (iClass = 0; iClass < cNumClasses; iClass++) { adProb[iClass] = exp(REAL(radPredF)[iObs + iClass * cRows]); dDenom += adProb[iClass]; } dDJDf = 0.0; for (iClass = 0; iClass < cNumClasses; iClass++) { adProb[iClass] /= dDenom; REAL(rdObjective)[0] += (adY[iObs + iClass * cRows] - adProb[iClass]) * (adY[iObs + iClass * cRows] - adProb[iClass]); dDJDf += -2*(adY[iObs + iClass * cRows] - adProb[iClass]); } REAL(rdObjective)[0] /= double(cNumClasses); dDJDf /= double(cNumClasses); } else { // DEBUG: need to make more general for other loss functions! REAL(rdObjective)[0] += (adY[iObs]-REAL(radPredF)[iObs])* (adY[iObs]-REAL(radPredF)[iObs]); dDJDf = -2*(adY[iObs]-REAL(radPredF)[iObs]); } for(iLambda=0; iLambda #include "distribution.h" #include "locationm.h" class CTDist : public CDistribution { public: CTDist(double adNu); virtual ~CTDist(); GBMRESULT UpdateParams(double *adF, double *adOffset, double *adWeight, unsigned long cLength) { return GBM_OK; }; GBMRESULT ComputeWorkingResponse(double *adY, double *adMisc, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff); GBMRESULT InitF(double *adY, double *adMisc, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength); GBMRESULT FitBestConstant(double *adY, double *adMisc, double *adOffset, double *adW, double *adF, double *adZ, unsigned long *aiNodeAssign, unsigned long nTrain, VEC_P_NODETERMINAL vecpTermNodes, unsigned long cTermNodes, unsigned long cMinObsInNode, bool *afInBag, double *adFadj, int cIdxOff); double Deviance(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, unsigned long cLength, int cIdxOff); double BagImprovement(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain); private: double mdNu; CLocationM *mpLocM; }; #endif // TDISTCGBM_H gbm/src/poisson.h0000644000176000001440000000567413064145661013523 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // File: poisson.h // // License: GNU GPL (version 2 or later) // // Contents: poisson object // // Owner: gregr@rand.org // // History: 3/26/2001 gregr created // 2/14/2003 gregr: adapted for R implementation // //------------------------------------------------------------------------------ #ifndef POISSON_H #define POISSON_H #include #include "distribution.h" class CPoisson : public CDistribution { public: CPoisson(); virtual ~CPoisson(); GBMRESULT UpdateParams(double *adF, double *adOffset, double *adWeight, unsigned long cLength) { return GBM_OK; }; GBMRESULT ComputeWorkingResponse(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adZ, bool *afInBag, unsigned long nTrain, int cIdxOff); double Deviance(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, unsigned long cLength, int cIdxOff); GBMRESULT InitF(double *adY, double *adMisc, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength); GBMRESULT FitBestConstant(double *adY, double *adMisc, double *adOffset, double *adW, double *adF, double *adZ, unsigned long *aiNodeAssign, unsigned long nTrain, VEC_P_NODETERMINAL vecpTermNodes, unsigned long cTermNodes, unsigned long cMinObsInNode, bool *afInBag, double *adFadj, int cIdxOff); double BagImprovement(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain); private: vector vecdNum; vector vecdDen; vector vecdMax; vector vecdMin; }; #endif // POISSON_H gbm/src/gbm.h0000644000176000001440000000355613064145661012573 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // // File: gbm.h // // License: GNU GPL (version 2 or later) // // Contents: Entry point for gbm.dll // // Owner: gregr@rand.org // // History: 2/14/2003 gregr created // 6/11/2007 gregr added quantile regression // written by Brian Kriegler // //------------------------------------------------------------------------------ #include #include "dataset.h" #include "distribution.h" #include "bernoulli.h" #include "adaboost.h" #include "poisson.h" #include "gaussian.h" #include "coxph.h" #include "laplace.h" #include "quantile.h" #include "tdist.h" #include "multinomial.h" #include "pairwise.h" #include "gbm_engine.h" #include "locationm.h" #include "huberized.h" typedef vector VEC_CATEGORIES; typedef vector VEC_VEC_CATEGORIES; GBMRESULT gbm_setup ( double *adY, double *adOffset, double *adX, int *aiXOrder, double *adWeight, double *adMisc, int cRows, int cCols, int *acVarClasses, int *alMonotoneVar, const char *pszFamily, int cTrees, int cLeaves, int cMinObsInNode, int cNumClasses, double dShrinkage, double dBagFraction, int cTrain, CDataset *pData, PCDistribution &pDist, int& cGroups ); GBMRESULT gbm_transfer_to_R ( CGBM *pGBM, VEC_VEC_CATEGORIES &vecSplitCodes, int *aiSplitVar, double *adSplitPoint, int *aiLeftNode, int *aiRightNode, int *aiMissingNode, double *adErrorReduction, double *adWeight, double *adPred, int cCatSplitsOld ); GBMRESULT gbm_transfer_catsplits_to_R ( int iCatSplit, VEC_VEC_CATEGORIES &vecSplitCodes, int *aiSplitCodes ); int size_of_vector ( VEC_VEC_CATEGORIES &vec, int i ); gbm/src/bernoulli.h0000644000176000001440000000555313064145661014020 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // // File: bernoulli.h // // License: GNU GPL (version 2 or later) // // Contents: bernoulli object // // Owner: gregr@rand.org // // History: 3/26/2001 gregr created // 2/14/2003 gregr: adapted for R implementation // //------------------------------------------------------------------------------ #ifndef BERNOULLI_H #define BERNOULLI_H #include "distribution.h" #include "buildinfo.h" class CBernoulli : public CDistribution { public: CBernoulli(); virtual ~CBernoulli(); GBMRESULT UpdateParams(double *adF, double *adOffset, double *adWeight, unsigned long cLength) { return GBM_OK; }; GBMRESULT ComputeWorkingResponse(double *adY, double *adMisc, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff); double Deviance(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, unsigned long cLength, int cIdxOff); GBMRESULT InitF(double *adY, double *adMisc, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength); GBMRESULT FitBestConstant(double *adY, double *adMisc, double *adOffset, double *adW, double *adF, double *adZ, unsigned long *aiNodeAssign, unsigned long nTrain, VEC_P_NODETERMINAL vecpTermNodes, unsigned long cTermNodes, unsigned long cMinObsInNode, bool *afInBag, double *adFadj, int cIdxOff); double BagImprovement(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain); private: vector vecdNum; vector vecdDen; }; #endif // BERNOULLI_H gbm/src/gaussian.cpp0000644000176000001440000000643113064145661014166 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "gaussian.h" CGaussian::CGaussian() { } CGaussian::~CGaussian() { } GBMRESULT CGaussian::ComputeWorkingResponse ( double *adY, double *adMisc, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff ) { GBMRESULT hr = GBM_OK; unsigned long i = 0; if((adY == NULL) || (adF == NULL) || (adZ == NULL) || (adWeight == NULL)) { hr = GBM_INVALIDARG; goto Error; } if(adOffset == NULL) { for(i=0; idPrediction = 0.0; } else { vecpTermNodes[iNode]->dPrediction = vecdNum[iNode]/vecdDen[iNode]; } } } return hr; } double CHuberized::BagImprovement ( double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain ) { double dReturnValue = 0.0; double dF = 0.0; double dW = 0.0; unsigned long i = 0; for(i=0; i 0) ? dClassSum : 1e-8; for (kk = 0; kk < mcNumClasses; kk++) { madProb[ii + kk * mcRows] /= dClassSum; } } return GBM_OK; } GBMRESULT CMultinomial::ComputeWorkingResponse ( double *adY, double *adMisc, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff ) { unsigned long i = 0; for(i=cIdxOff; icN >= cMinObsInNode) { // Get the number of nodes here double dNum = 0.0; double dDenom = 0.0; for (iObs = 0; iObs < nTrain; iObs++) { if(afInBag[iObs] && (aiNodeAssign[iObs] == iNode)) { int iIdx = iObs + cIdxOff; dNum += adW[iIdx] * adZ[iIdx]; dDenom += adW[iIdx] * fabs(adZ[iIdx]) * (1 - fabs(adZ[iIdx])); } } dDenom = (dDenom > 0) ? dDenom : 1e-8; vecpTermNodes[iNode]->dPrediction = dNum / dDenom; } } return hr; } double CMultinomial::BagImprovement ( double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain ) { double dReturnValue = 0.0; double dW = 0.0; unsigned long ii; unsigned long kk; // Calculate the probabilities after the step double *adStepProb = new double[mcNumClasses * mcRows]; // Assume that this is last class - calculate new prob as in updateParams but // using (F_ik + ss*Fadj_ik) instead of F_ik. Then calculate OOB improve for (ii = 0; ii < mcRows; ii++) { double dClassSum = 0.0; for (kk = 0; kk < mcNumClasses; kk++) { int iIdx = ii + kk * mcRows; double dF = (adOffset == NULL) ? adF[iIdx] : adF[iIdx] + adOffset[iIdx]; dF += dStepSize * adFadj[iIdx]; adStepProb[iIdx] = adWeight[iIdx] * exp(dF); dClassSum += adWeight[iIdx] * exp(dF); } dClassSum = (dClassSum > 0) ? dClassSum : 1e-8; for (kk = 0; kk < mcNumClasses; kk++) { adStepProb[ii + kk * mcRows] /= dClassSum; } } // Calculate the improvement for(ii=0; ii #include "distribution.h" class CQuantile: public CDistribution { public: CQuantile(double dAlpha); virtual ~CQuantile(); GBMRESULT UpdateParams(double *adF, double *adOffset, double *adWeight, unsigned long cLength) { return GBM_OK; }; GBMRESULT ComputeWorkingResponse(double *adY, double *adMisc, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff); GBMRESULT InitF(double *adY, double *adMisc, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength); GBMRESULT FitBestConstant(double *adY, double *adMisc, double *adOffset, double *adW, double *adF, double *adZ, unsigned long *aiNodeAssign, unsigned long nTrain, VEC_P_NODETERMINAL vecpTermNodes, unsigned long cTermNodes, unsigned long cMinObsInNode, bool *afInBag, double *adFadj, int cIdxOff); double Deviance(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, unsigned long cLength, int cIdxOff); double BagImprovement(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain); private: vector vecd; double dAlpha; }; #endif // QUANTILE_H gbm/src/distribution.h0000644000176000001440000001312113064145661014532 0ustar ripleyusers//------------------------------------------------------------------------------ // GBM by Greg Ridgeway Copyright (C) 2003 // // File: distribution.h // // License: GNU GPL (version 2 or later) // // Contents: distribution object // // Owner: gregr@rand.org // // History: 3/26/2001 gregr created // 2/14/2003 gregr: adapted for R implementation // //------------------------------------------------------------------------------ #ifndef DISTRIBUTION_H #define DISTRIBUTION_H #include "node_terminal.h" class CDistribution { public: CDistribution(); virtual ~CDistribution(); // In the subsequent functions, parameters have the following meaning: // * adY - The target // * adMisc - Optional auxiliary data (the precise meaning is specific to the // derived class) // * adOffset - An optional offset to the score (adF) // * adWeight - Instance training weight // * adF - Current score (sum of all trees generated so far) // * adZ - (Negative) gradient of loss function, to be predicted by tree // * adFadj - Output of current tree, to be added to adF // * cLength - Number of instances (size of vectors) // * afInBag - true if instance is part of training set for current tree // (depends on random subsampling) // * cIdxOff - Offset used for multi-class training (CMultinomial). // Initialize() is called once, before training starts. // It gives derived classes a chance for custom preparations, e.g., to allocate // memory or to pre-compute values that do not change between iterations. virtual GBMRESULT Initialize(double *adY, double *adMisc, double *adOffset, double *adWeight, unsigned long cLength) { return GBM_OK; } // UpdateParams() is called at the start of each iteration. // CMultinomial uses it to normalize predictions across multiple classes. virtual GBMRESULT UpdateParams(double *adF, double *adOffset, double *adWeight, unsigned long cLength) = 0; // ComputeWorkingResonse() calculates the negative gradients of the // loss function, and stores them in adZ. virtual GBMRESULT ComputeWorkingResponse(double *adY, double *adMisc, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long cLength, int cIdxOff) = 0; // InitF() computes the best constant prediction for all instances, and // stores it in dInitF. virtual GBMRESULT InitF(double *adY, double *adMisc, double *adOffset, double *adWeight, double &dInitF, unsigned long cLength) = 0; // Deviance() returns the value of the loss function, based on the // current predictions (adF). virtual double Deviance(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, unsigned long cLength, int cIdxOff) = 0; // FitBestConstant() calculates and sets prediction values for all terminal nodes // of the tree being currently constructed. // Assumptions: // * cTermNodes is the number of terminal nodes of the tree. // * vecpTermNodes is a vector of (pointers to) the terminal nodes of the tree, of // size cTermNodes. // * aiNodeAssign is a vector of size cLength, that maps each instance to an index // into vecpTermNodes for the corresponding terminal node. virtual GBMRESULT FitBestConstant(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adZ, unsigned long *aiNodeAssign, unsigned long cLength, VEC_P_NODETERMINAL vecpTermNodes, unsigned long cTermNodes, unsigned long cMinObsInNode, bool *afInBag, double *adFadj, int cIdxOff) = 0; // BagImprovement() returns the incremental difference in the loss // function induced by scoring with (adF + dStepSize * adFAdj) instead of adF, for // all instances that were not part of the training set for the current tree (i.e., // afInBag set to false). virtual double BagImprovement(double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long cLength) = 0; }; typedef CDistribution *PCDistribution; #endif // DISTRIBUTION_H gbm/src/tdist.cpp0000644000176000001440000001055713064145661013507 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 #include "tdist.h" CTDist::CTDist(double adNu) { mdNu = adNu; double *adParams = new double[1]; adParams[0] = adNu; mpLocM = new CLocationM("tdist", 1, adParams); delete[] adParams; } CTDist::~CTDist() { delete mpLocM; } GBMRESULT CTDist::ComputeWorkingResponse ( double *adY, double *adMisc, double *adOffset, double *adF, double *adZ, double *adWeight, bool *afInBag, unsigned long nTrain, int cIdxOff ) { unsigned long i = 0; double dU = 0.0; if(adOffset == NULL) { for(i=0; iLocationM(iN, adArr, adWeight); delete[] adArr; return GBM_OK; } double CTDist::Deviance ( double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, unsigned long cLength, int cIdxOff ) { unsigned long i=0; double dL = 0.0; double dW = 0.0; double dU = 0.0; if(adOffset == NULL) { for(i=cIdxOff; icN >= cMinObsInNode) { // Get the number of nodes here int iNumNodes = 0; for (iObs = 0; iObs < nTrain; iObs++) { if(afInBag[iObs] && (aiNodeAssign[iObs] == iNode)) { iNumNodes++; } } // Create the arrays to centre double *adArr = new double[iNumNodes]; double *adWeight = new double[iNumNodes]; int iIdx = 0; for(iObs=0; iObsdPrediction = mpLocM->LocationM(iNumNodes, adArr, adWeight); delete[] adArr; delete[] adWeight; } } return hr; } double CTDist::BagImprovement ( double *adY, double *adMisc, double *adOffset, double *adWeight, double *adF, double *adFadj, bool *afInBag, double dStepSize, unsigned long nTrain ) { double dReturnValue = 0.0; double dF = 0.0; double dW = 0.0; unsigned long i = 0; double dU = 0.0; double dV = 0.0; for(i=0; i // generic object (class) definition of matrix: template class matrix{ // NOTE: maxsize determines available memory storage, but // actualsize determines the actual size of the stored matrix in use // at a particular time. int maxsize; // max number of rows (same as max number of columns) int actualsize; // actual size (rows, or columns) of the stored matrix D* data; // where the data contents of the matrix are stored void allocateD() { delete[] data; data = new D [maxsize*maxsize]; }; public: matrix() { maxsize = 5; actualsize = 5; data = 0; allocateD(); }; // private ctor's matrix(int newmaxsize) {matrix(newmaxsize,newmaxsize);}; matrix(int newmaxsize, int newactualsize) { // the only public ctor if (newmaxsize <= 0) newmaxsize = 5; maxsize = newmaxsize; if ((newactualsize <= newmaxsize)&&(newactualsize>0)) actualsize = newactualsize; else actualsize = newmaxsize; // since allocateD() will first call delete[] on data: data = 0; allocateD(); }; ~matrix() { delete[] data; }; void dumpMatrixValues() { bool xyz; double rv; for (int i=0; i < actualsize; i++) { cout << "i=" << i << ": "; for (int j=0; j maxunitydeviation ) { maxunitydeviation = currentunitydeviation; worstdiagonal = i; } } int worstoffdiagonalrow = 0; int worstoffdiagonalcolumn = 0; D maxzerodeviation = 0.0; D currentzerodeviation ; for ( i = 0; i < actualsize; i++ ) { for ( int j = 0; j < actualsize; j++ ) { if ( i == j ) continue; // we look only at non-diagonal terms currentzerodeviation = data[i*maxsize+j]; if ( currentzerodeviation < 0.0) currentzerodeviation *= -1.0; if ( currentzerodeviation > maxzerodeviation ) { maxzerodeviation = currentzerodeviation; worstoffdiagonalrow = i; worstoffdiagonalcolumn = j; } } } cout << "Worst diagonal value deviation from unity: " << maxunitydeviation << " at row/column " << worstdiagonal << endl; cout << "Worst off-diagonal value deviation from zero: " << maxzerodeviation << " at row = " << worstoffdiagonalrow << ", column = " << worstoffdiagonalcolumn << endl; } void settoproduct(matrix& left, matrix& right) { actualsize = left.getactualsize(); if ( maxsize < left.getactualsize() ) { maxsize = left.getactualsize(); allocateD(); } for ( int i = 0; i < actualsize; i++ ) { for ( int j = 0; j < actualsize; j++ ) { D sum = 0.0; D leftvalue, rightvalue; bool success; for (int c = 0; c < actualsize; c++) { left.getvalue(i,c,leftvalue,success); right.getvalue(c,j,rightvalue,success); sum += leftvalue * rightvalue; } setvalue(i,j,sum); } } } void copymatrix(matrix& source) { actualsize = source.getactualsize(); if ( maxsize < source.getactualsize() ) { maxsize = source.getactualsize(); allocateD(); } for ( int i = 0; i < actualsize; i++ ) { for ( int j = 0; j < actualsize; j++ ) { D value; bool success; source.getvalue(i,j,value,success); data[i*maxsize+j] = value; } } }; void setactualsize(int newactualsize) { if ( newactualsize > maxsize ) { maxsize = newactualsize ; // * 2; // wastes memory but saves // time otherwise required for // operation new[] allocateD(); } if (newactualsize >= 0) actualsize = newactualsize; }; int getactualsize() { return actualsize; }; void getvalue(int row, int column, D& returnvalue, bool& success) { if ( (row>=maxsize) || (column>=maxsize) || (row<0) || (column<0) ) { success = false; return; } returnvalue = data[ row * maxsize + column ]; success = true; }; bool setvalue(int row, int column, D newvalue) { if ( (row >= maxsize) || (column >= maxsize) || (row<0) || (column<0) ) return false; data[ row * maxsize + column ] = newvalue; return true; }; void invert() { int i = 0; int j = 0; int k = 0; if (actualsize <= 0) return; // sanity check if (actualsize == 1) { data[0] = 1.0/data[0]; return; } for (i=1; i < actualsize; i++) data[i] /= data[0]; // normalize row 0 for (i=1; i < actualsize; i++) { for ( j=i; j < actualsize; j++) { // do a column of L D sum = 0.0; for ( k = 0; k < i; k++) sum += data[j*maxsize+k] * data[k*maxsize+i]; data[j*maxsize+i] -= sum; } if (i == actualsize-1) continue; for ( j=i+1; j < actualsize; j++) { // do a row of U D sum = 0.0; for ( k = 0; k < i; k++) sum += data[i*maxsize+k]*data[k*maxsize+j]; data[i*maxsize+j] = (data[i*maxsize+j]-sum) / data[i*maxsize+i]; } } for ( i = 0; i < actualsize; i++ ) // invert L { for ( j = i; j < actualsize; j++ ) { D x = 1.0; if ( i != j ) { x = 0.0; for ( k = i; k < j; k++ ) x -= data[j*maxsize+k]*data[k*maxsize+i]; } data[j*maxsize+i] = x / data[j*maxsize+j]; } } for ( i = 0; i < actualsize; i++ ) // invert U { for ( j = i; j < actualsize; j++ ) { if ( i == j ) continue; D sum = 0.0; for ( k = i; k < j; k++ ) sum += data[k*maxsize+j]*( (i==k) ? 1.0 : data[i*maxsize+k] ); data[i*maxsize+j] = -sum; } } for ( i = 0; i < actualsize; i++ ) // final inversion { for ( j = 0; j < actualsize; j++ ) { D sum = 0.0; for ( k = ((i>j)?i:j); k < actualsize; k++ ) sum += ((j==k)?1.0:data[j*maxsize+k])*data[k*maxsize+i]; data[j*maxsize+i] = sum; } } }; }; #endif gbm/src/buildinfo.h0000644000176000001440000000110213064145661013762 0ustar ripleyusers// GBM by Greg Ridgeway Copyright (C) 2003 // License: GNU GPL (version 2 or later) #ifndef BUILDINFO_H #define BUILDINFO_H #undef ERROR #include #define GBM_FAILED(hr) ((unsigned long)hr != 0) typedef unsigned long GBMRESULT; #define GBM_OK 0 #define GBM_FAIL 1 #define GBM_INVALIDARG 2 #define GBM_OUTOFMEMORY 3 #define GBM_INVALID_DATA 4 #define GBM_NOTIMPL 5 #define LEVELS_PER_CHUNK ((unsigned long) 1) typedef unsigned long ULONG; typedef char *PCHAR; // #define NOISY_DEBUG #endif // BUILDINFO_H gbm/NAMESPACE0000644000176000001440000000171513064144460012274 0ustar ripleyusers# Export all names that don't start with "." exportPattern("^[^\\.]") useDynLib(gbm) importFrom(survival, Surv) # ns from splines is used in one of the examples importFrom(splines, ns, splineDesign) # xyplot is used, which means several functions internal # to lattice will also be used. Import the lot. import(lattice) import(parallel) importFrom("grDevices", "rainbow") importFrom("graphics", "abline", "axis", "barplot", "lines", "mtext", "par", "plot", "polygon", "rug", "segments", "title") importFrom("stats", "approx", "binomial", "delete.response", "gaussian", "glm", "loess", "model.extract", "model.frame", "model.offset", "model.response", "model.weights", "na.pass", "poisson", "predict", "quantile", "rbinom", "reformulate", "rexp", "rnorm", "runif", "sd", "supsmu", "terms", "var", "weighted.mean") S3method(plot, gbm) S3method(predict, gbm) S3method(print, gbm) S3method(summary, gbm) gbm/demo/0000755000176000001440000000000012143232747012000 5ustar ripleyusersgbm/demo/bernoulli.R0000644000176000001440000000641312134211007014104 0ustar ripleyusers# LOGISTIC REGRESSION EXAMPLE cat("Running logistic regression example.\n") # create some data N <- 1000 X1 <- runif(N) X2 <- runif(N) X3 <- factor(sample(letters[1:4],N,replace=T)) mu <- c(-1,0,1,2)[as.numeric(X3)] p <- 1/(1+exp(-(sin(3*X1) - 4*X2 + mu))) Y <- rbinom(N,1,p) # random weights if you want to experiment with them w <- rexp(N) w <- N*w/sum(w) data <- data.frame(Y=Y,X1=X1,X2=X2,X3=X3) # fit initial model gbm1 <- gbm(Y~X1+X2+X3, # formula data=data, # dataset weights=w, var.monotone=c(0,0,0), # -1: monotone decrease, +1: monotone increase, 0: no monotone restrictions distribution="bernoulli", n.trees=3000, # number of trees shrinkage=0.001, # shrinkage or learning rate, 0.001 to 0.1 usually work interaction.depth=3, # 1: additive model, 2: two-way interactions, etc bag.fraction = 0.5, # subsampling fraction, 0.5 is probably best train.fraction = 0.5, # fraction of data for training, first train.fraction*N used for training cv.folds=5, # do 5-fold cross-validation n.minobsinnode = 10, # minimum total weight needed in each node verbose = FALSE) # don't print progress # plot the performance best.iter.oob <- gbm.perf(gbm1,method="OOB") # returns out-of-bag estimated best number of trees print(best.iter.oob) best.iter.cv <- gbm.perf(gbm1,method="cv") # returns 5-fold cv estimate of best number of trees print(best.iter.cv) best.iter.test <- gbm.perf(gbm1,method="test") # returns test set estimate of best number of trees print(best.iter.test) best.iter <- best.iter.test # plot variable influence summary(gbm1,n.trees=1) # based on the first tree summary(gbm1,n.trees=best.iter) # based on the estimated best number of trees # create marginal plots # plot variable X1,X2,X3 after "best" iterations par(mfrow=c(1,3)) plot.gbm(gbm1,1,best.iter) plot.gbm(gbm1,2,best.iter) plot.gbm(gbm1,3,best.iter) par(mfrow=c(1,1)) plot.gbm(gbm1,1:2,best.iter) # contour plot of variables 1 and 2 after "best" number iterations plot.gbm(gbm1,2:3,best.iter) # lattice plot of variables 2 and 3 after "best" number iterations # 3-way plot plot.gbm(gbm1,1:3,best.iter) # print the first and last trees print(pretty.gbm.tree(gbm1,1)) print(pretty.gbm.tree(gbm1,gbm1$n.trees)) # make some new data N <- 1000 X1 <- runif(N) X2 <- runif(N) X3 <- factor(sample(letters[1:4],N,replace=T)) mu <- c(-1,0,1,2)[as.numeric(X3)] p <- 1/(1+exp(-(sin(3*X1) - 4*X2 + mu))) Y <- rbinom(N,1,p) data2 <- data.frame(Y=Y,X1=X1,X2=X2,X3=X3) # predict on the new data using "best" number of trees # f.predict will be on the canonical scale (logit,log,etc.) f.predict <- predict.gbm(gbm1,data2, n.trees=c(best.iter.oob,best.iter.cv,best.iter.test)) # transform to probability scale for logistic regression p.pred <- 1/(1+exp(-f.predict)) # calibration plot for logistic regression - well calibrated means a 45 degree line par(mfrow=c(1,1)) calibrate.plot(Y,p.pred[,3]) # logistic error sum(data2$Y*f.predict[,1] - log(1+exp(f.predict[,1]))) sum(data2$Y*f.predict[,2] - log(1+exp(f.predict[,2]))) sum(data2$Y*f.predict[,3] - log(1+exp(f.predict[,3]))) gbm/demo/gaussian.R0000644000176000001440000000762212134235103013731 0ustar ripleyusers# LEAST SQUARES EXAMPLE cat("Running least squares regression example.\n") # create some data N <- 1000 X1 <- runif(N) X2 <- 2*runif(N) X3 <- factor(sample(letters[1:4],N,replace=T)) X4 <- ordered(sample(letters[1:6],N,replace=T)) X5 <- factor(sample(letters[1:3],N,replace=T)) X6 <- 3*runif(N) mu <- c(-1,0,1,2)[as.numeric(X3)] SNR <- 10 # signal-to-noise ratio Y <- X1**1.5 + 2 * (X2**.5) + mu sigma <- sqrt(var(Y)/SNR) Y <- Y + rnorm(N,0,sigma) # create a bunch of missing values X1[sample(1:N,size=100)] <- NA X3[sample(1:N,size=300)] <- NA # random weights if you want to experiment with them # w <- rexp(N) # w <- N*w/sum(w) w <- rep(1,N) data <- data.frame(Y=Y,X1=X1,X2=X2,X3=X3,X4=X4,X5=X5,X6=X6) # fit initial model gbm1 <- gbm(Y~X1+X2+X3+X4+X5+X6, # formula data=data, # dataset var.monotone=c(0,0,0,0,0,0), # -1: monotone decrease, +1: monotone increase, 0: no monotone restrictions distribution="gaussian", # bernoulli, adaboost, gaussian, poisson, coxph, or # list(name="quantile",alpha=0.05) for quantile regression n.trees=2000, # number of trees shrinkage=0.005, # shrinkage or learning rate, 0.001 to 0.1 usually work interaction.depth=3, # 1: additive model, 2: two-way interactions, etc bag.fraction = 0.5, # subsampling fraction, 0.5 is probably best train.fraction = 0.5, # fraction of data for training, first train.fraction*N used for training n.minobsinnode = 10, # minimum number of obs needed in each node keep.data=TRUE, cv.folds=10, # do 10-fold cross-validation verbose = FALSE) # don't print progress # plot the performance best.iter <- gbm.perf(gbm1,method="OOB") # returns out-of-bag estimated best number of trees best.iter <- gbm.perf(gbm1,method="test") # returns test set estimate of best number of trees best.iter <- gbm.perf(gbm1,method="cv") # returns cv estimate of best number of trees # plot variable influence summary(gbm1,n.trees=1) # based on the first tree summary(gbm1,n.trees=best.iter) # based on the estimated best number of trees # print the first and last trees print(pretty.gbm.tree(gbm1,1)) print(pretty.gbm.tree(gbm1,gbm1$n.trees)) print(gbm1$c.splits[1:3]) # make some new data N <- 1000 X1 <- runif(N) X2 <- 2*runif(N) X3 <- factor(sample(letters[1:4],N,replace=TRUE)) X4 <- ordered(sample(letters[1:6],N,replace=TRUE)) X5 <- factor(sample(letters[1:3],N,replace=TRUE)) X6 <- 3*runif(N) mu <- c(-1,0,1,2)[as.numeric(X3)] Y <- X1**1.5 + 2 * (X2**.5) + mu Y <- Y + rnorm(N,0,sigma) data2 <- data.frame(Y=Y,X1=X1,X2=X2,X3=X3,X4=X4,X5=X5,X6=X6) print(data2[1:10,]) # predict on the new data using "best" number of trees f.predict <- predict(gbm1,data2,best.iter) # f.predict will be on the canonical scale (logit,log,etc.) print(f.predict[1:10]) # least squares error print(sum((data2$Y-f.predict)^2)) # create marginal plots # plot variable X1,X2,X3 after "best" iterations par(mfrow=c(1,3)) plot(gbm1,1,best.iter) plot(gbm1,2,best.iter) plot(gbm1,3,best.iter) par(mfrow=c(1,1)) plot(gbm1,1:2,best.iter) # contour plot of variables 1 and 2 after "best" number iterations plot(gbm1,2:3,best.iter) # lattice plot of variables 2 and 3 after "best" number iterations plot(gbm1,3:4,best.iter) # lattice plot of variables 2 and 3 after "best" number iterations plot(gbm1,c(1,2,6),best.iter,cont=20) # 3-way plots plot(gbm1,1:3,best.iter) plot(gbm1,2:4,best.iter) plot(gbm1,3:5,best.iter) # check interactions interact.gbm(gbm1,data=data,i.var=1:2,n.trees=best.iter) # get all two way interactions i.var <- subset(expand.grid(x1=1:6,x2=1:6), x1=data2$tt[i])*exp(f.predict) ) } cat("Boosting:",sum( data2$delta*( f.predict - log(risk) ) ),"\n") # linear model coxph1 <- coxph(Surv(tt,delta)~X1+X2+X3,data=data) f.predict <- predict(coxph1,newdata=data2) risk <- rep(0,N) for(i in 1:N) { risk[i] <- sum( (data2$tt>=data2$tt[i])*exp(f.predict) ) } cat("Linear model:",sum( data2$delta*( f.predict - log(risk) ) ),"\n") gbm/R/0000755000176000001440000000000012477771576011277 5ustar ripleyusersgbm/R/gbmDoFold.R0000644000176000001440000000213512134211007013220 0ustar ripleyusersgbmDoFold <- # Do specified cross-validation fold - a self-contained function for # passing to individual cores. function(X, i.train, x, y, offset, distribution, w, var.monotone, n.trees, interaction.depth, n.minobsinnode, shrinkage, bag.fraction, cv.group, var.names, response.name, group, s){ library(gbm, quietly=TRUE) cat("CV:", X, "\n") set.seed(s[[X]]) i <- order(cv.group == X) x <- x[i.train,,drop=TRUE][i,,drop=FALSE] y <- y[i.train][i] offset <- offset[i.train][i] nTrain <- length(which(cv.group != X)) group <- group[i.train][i] res <- gbm.fit(x, y, offset=offset, distribution=distribution, w=w, var.monotone=var.monotone, n.trees=n.trees, interaction.depth=interaction.depth, n.minobsinnode=n.minobsinnode, shrinkage=shrinkage, bag.fraction=bag.fraction, nTrain=nTrain, keep.data=FALSE, verbose=FALSE, response.name=response.name, group=group) res } gbm/R/gbmCluster.R0000644000176000001440000000035512134211007013474 0ustar ripleyusersgbmCluster <- function(n){ # If number of cores (n) not given, try to work it out from the number # that appear to be available and the number of CV folds. if (is.null(n)){ n <- detectCores() } makeCluster(n) } gbm/R/gbm.perf.R0000644000176000001440000001351612102666411013100 0ustar ripleyusersgbm.perf <- function(object, plot.it=TRUE, oobag.curve=FALSE, overlay=TRUE, method) { smoother <- NULL if ( missing( method ) ){ if ( object$train.fraction < 1 ){ method <- "test" } else if ( !is.null( object$cv.error ) ){ method <- "cv" } else { method <- "OOB" } cat( paste( "Using", method, "method...\n" ) ) } if((method == "OOB") || oobag.curve) { if(object$bag.fraction==1) stop("Cannot compute OOB estimate or the OOB curve when bag.fraction=1") if(all(!is.finite(object$oobag.improve))) stop("Cannot compute OOB estimate or the OOB curve. No finite OOB estimates of improvement") x <- 1:object$n.trees smoother <- loess(object$oobag.improve~x, enp.target=min(max(4,length(x)/10),50)) smoother$y <- smoother$fitted smoother$x <- x best.iter.oob <- x[which.min(-cumsum(smoother$y))] best.iter <- best.iter.oob } if(method == "OOB") { warning("OOB generally underestimates the optimal number of iterations although predictive performance is reasonably competitive. Using cv.folds>0 when calling gbm usually results in improved predictive performance.") } if(method == "test") { best.iter.test <- which.min(object$valid.error) best.iter <- best.iter.test } if(method == "cv") { if(is.null(object$cv.error)) stop("In order to use method=\"cv\" gbm must be called with cv.folds>1.") if(length(object$cv.error) < object$n.trees) warning("cross-validation error is not computed for any additional iterations run using gbm.more().") best.iter.cv <- which.min(object$cv.error) best.iter <- best.iter.cv } if(!is.element(method,c("OOB","test","cv"))) stop("method must be cv, test, or OOB") if(plot.it) { par(mar=c(5,4,4,4)+.1) if (object$distribution$name !="pairwise") { ylab <- switch(substring(object$distribution$name,1,2), ga="Squared error loss", be="Bernoulli deviance", po="Poisson deviance", ad="AdaBoost exponential bound", co="Cox partial deviance", la="Absolute loss", qu="Quantile loss", mu="Multinomial deviance", td="t-distribution deviance" ) } else # object$distribution$name =="pairwise" { ylab <- switch(object$distribution$metric, conc ="Fraction of concordant pairs", ndcg="Normalized discounted cumulative gain", map ="Mean average precision", mrr ="Mean reciprocal rank" ) } if(object$train.fraction==1) { # HS Next line changed to scale axis to include other error # ylim <- range(object$train.error) if ( method=="cv" ){ ylim <- range(object$train.error, object$cv.error) } else if ( method == "test" ){ ylim <- range( object$train.error, object$valid.error) } else { ylim <- range(object$train.error) } } else { ylim <- range(object$train.error,object$valid.error) } plot(object$train.error, ylim=ylim, type="l", xlab="Iteration",ylab=ylab) if(object$train.fraction!=1) { lines(object$valid.error,col="red") } if(method=="cv") { lines(object$cv.error,col="green") } if(!is.na(best.iter)) abline(v=best.iter,col="blue",lwd=2,lty=2) if(oobag.curve) { if(overlay) { par(new=TRUE) plot(smoother$x, cumsum(smoother$y), col="blue", type="l", xlab="",ylab="", axes=FALSE) axis(4,srt=0) at <- mean(range(smoother$y)) mtext(paste("OOB improvement in",ylab),side=4,srt=270,line=2) abline(h=0,col="blue",lwd=2) } plot(object$oobag.improve,type="l", xlab="Iteration", ylab=paste("OOB change in",ylab)) lines(smoother,col="red",lwd=2) abline(h=0,col="blue",lwd=1) abline(v=best.iter,col="blue",lwd=1) } } return(best.iter) } perf.pairwise <- function(y, f, group, metric="ndcg", w=NULL, max.rank=0) { func.name <- switch(metric, conc = "ir.measure.conc", mrr = "ir.measure.mrr", map = "ir.measure.map", ndcg = "ir.measure.ndcg", stop(paste("Metric",metric,"is not supported")) ) # Optimization: for binary targets, # AUC is equivalent but faster than CONC if (metric == "conc" && all(is.element(y, 0:1))) { func.name <- "ir.measure.auc" } # Max rank = 0 means no cut off if (max.rank <= 0) { max.rank <- length(y)+1 } # Random tie breaking in case of duplicate scores. # (Without tie breaking, we would overestimate if instances are # sorted descending on target) f <- f + 1E-10 * runif(length(f), min=-0.5, max=0.5) measure.by.group <- as.matrix(by(list(y, f), INDICES=group, FUN=get(func.name), max.rank=max.rank)) # Exclude groups with single result or only negative or positive instances idx <- which((!is.null(measure.by.group)) & measure.by.group >= 0) if (is.null(w)) { return (mean(measure.by.group[idx])) } else { # Assumption: weights are constant per group w.by.group <- tapply(w, group, mean) return (weighted.mean(measure.by.group[idx], w=w.by.group[idx])) } } gbm/R/interact.gbm.R0000644000176000001440000000725312142724707013765 0ustar ripleyusers# Compute Friedman's H statistic for interaction effects interact.gbm <- function(x, data, i.var = 1, n.trees = x$n.trees){ ############################################################### # Do sanity checks on the call if (x$interaction.depth < length(i.var)){ stop("interaction.depth too low in model call") } if (all(is.character(i.var))){ i <- match(i.var, x$var.names) if (any(is.na(i))) { stop("Variables given are not used in gbm model fit: ", i.var[is.na(i)]) } else { i.var <- i } } if ((min(i.var) < 1) || (max(i.var) > length(x$var.names))) { warning("i.var must be between 1 and ", length(x$var.names)) } if (n.trees > x$n.trees) { warning(paste("n.trees exceeds the number of trees in the model, ", x$n.trees,". Using ", x$n.trees, " trees.", sep = "")) n.trees <- x$n.trees } # End of sanity checks ############################################################### unique.tab <- function(z,i.var) { a <- unique(z[,i.var,drop=FALSE]) a$n <- table(factor(apply(z[,i.var,drop=FALSE],1,paste,collapse="\r"), levels=apply(a,1,paste,collapse="\r"))) return(a) } # convert factors for(j in i.var) { if(is.factor(data[,x$var.names[j]])) data[,x$var.names[j]] <- as.numeric(data[,x$var.names[j]])-1 } # generate a list with all combinations of variables a <- apply(expand.grid(rep(list(c(FALSE,TRUE)), length(i.var)))[-1,],1, function(x) as.numeric(which(x))) FF <- vector("list",length(a)) for(j in 1:length(a)) { FF[[j]]$Z <- data.frame(unique.tab(data, x$var.names[i.var[a[[j]]]])) FF[[j]]$n <- as.numeric(FF[[j]]$Z$n) FF[[j]]$Z$n <- NULL FF[[j]]$f <- .Call("gbm_plot", X = as.double(data.matrix(FF[[j]]$Z)), cRows = as.integer(nrow(FF[[j]]$Z)), cCols = as.integer(ncol(FF[[j]]$Z)), n.class = as.integer(x$num.classes), i.var = as.integer(i.var[a[[j]]] - 1), n.trees = as.integer(n.trees), initF = as.double(x$initF), trees = x$trees, c.splits = x$c.splits, var.type = as.integer(x$var.type), PACKAGE = "gbm") # FF[[jj]]$Z is the data, f is the predictions, n is the number of levels for factors # Need to restructure f to deal with multinomial case FF[[j]]$f <- matrix(FF[[j]]$f, ncol=x$num.classes, byrow=FALSE) # center the values FF[[j]]$f <- apply(FF[[j]]$f, 2, function(x, w){ x - weighted.mean(x, w, na.rm=TRUE) }, w=FF[[j]]$n) # precompute the sign of these terms to appear in H FF[[j]]$sign <- ifelse(length(a[[j]]) %% 2 == length(i.var) %% 2, 1, -1) } H <- FF[[length(a)]]$f for(j in 1:(length(a)-1)){ i1 <- apply(FF[[length(a)]]$Z[,a[[j]], drop=FALSE], 1, paste, collapse="\r") i2 <- apply(FF[[j]]$Z,1,paste,collapse="\r") i <- match(i1, i2) H <- H + with(FF[[j]], sign*f[i,]) } # Compute H w <- matrix(FF[[length(a)]]$n, ncol=1) f <- matrix(FF[[length(a)]]$f^2, ncol=x$num.classes, byrow=FALSE) top <- apply(H^2, 2, weighted.mean, w = w, na.rm = TRUE) btm <- apply(f, 2, weighted.mean, w = w, na.rm = TRUE) H <- top / btm if (x$distribution$name=="multinomial"){ names(H) <- x$classes } # If H > 1, rounding and tiny main effects have messed things up H[H > 1] <- NaN return(sqrt(H)) } gbm/R/getStratify.R0000644000176000001440000000063212102666411013700 0ustar ripleyusersgetStratify <- function(strat, d){ if (is.null(strat)){ if (d$name == "multinomial" ){ strat <- TRUE } else { strat <- FALSE } } else { if (!is.element(d$name, c( "bernoulli", "multinomial"))){ warning("You can only use class.stratify.cv when distribution is bernoulli or multinomial. Ignored.") strat <- FALSE } } # Close else strat } gbm/R/shrink.gbm.R0000644000176000001440000000265712102666411013446 0ustar ripleyusers# evaluates the objective function and gradient with respect to beta # beta = log(lambda/(1-lambda)) shrink.gbm <- function(object,n.trees, lambda=rep(10,length(object$var.names)), ...) { if(length(lambda) != length(object$var.names)) { stop("lambda must have the same length as the number of variables in the gbm object.") } if(is.null(object$data)) { stop("shrink.gbm requires keep.data=TRUE when gbm model is fit.") } y <- object$data$y x <- object$data$x cCols <- length(object$var.names) cRows <- length(x)/cCols if(missing(n.trees) || (n.trees > object$n.trees)) { n.trees <- object$n.trees warning("n.trees not specified or some values exceeded number fit so far. Using ",n.trees,".") } result <- .Call("gbm_shrink_gradient", y=as.double(y), X=as.double(x), cRows=as.integer(cRows), cCols=as.integer(cCols), n.trees=as.integer(n.trees), initF=object$initF, trees=object$trees, c.split=object$c.split, var.type=as.integer(object$var.type), depth=as.integer(object$interaction.depth), lambda=as.double(lambda), PACKAGE = "gbm") names(result) <- c("predF","objective","gradient") return(result) } gbm/R/calibrate.plot.R0000644000176000001440000000457612102666411014311 0ustar ripleyusersquantile.rug <- function(x,prob=(0:10)/10,...) { quants <- quantile(x[!is.na(x)],prob=prob) if(length(unique(quants)) < length(prob)) { quants <- jitter(quants) } rug(quants,...) } calibrate.plot <- function(y,p, distribution="bernoulli", replace=TRUE, line.par=list(col="black"), shade.col="lightyellow", shade.density=NULL, rug.par=list(side=1), xlab="Predicted value", ylab="Observed average", xlim=NULL,ylim=NULL, knots=NULL,df=6, ...) { data <- data.frame(y=y,p=p) if(is.null(knots) && is.null(df)) stop("Either knots or df must be specified") if((df != round(df)) || (df<1)) stop("df must be a positive integer") if(distribution=="bernoulli") { family1 = binomial } else if(distribution=="poisson") { family1 = poisson } else { family1 = gaussian } gam1 <- glm(y~ns(p,df=df,knots=knots),data=data,family=family1) x <- seq(min(p),max(p),length=200) yy <- predict(gam1,newdata=data.frame(p=x),se.fit=TRUE,type="response") x <- x[!is.na(yy$fit)] yy$se.fit <- yy$se.fit[!is.na(yy$fit)] yy$fit <- yy$fit[!is.na(yy$fit)] if(!is.na(shade.col)) { se.lower <- yy$fit-2*yy$se.fit se.upper <- yy$fit+2*yy$se.fit if(distribution=="bernoulli") { se.lower[se.lower < 0] <- 0 se.upper[se.upper > 1] <- 1 } if(distribution=="poisson") { se.lower[se.lower < 0] <- 0 } if(is.null(xlim)) xlim <- range(se.lower,se.upper,x) if(is.null(ylim)) ylim <- range(se.lower,se.upper,x) } else { if(is.null(xlim)) xlim <- range(yy$fit,x) if(is.null(ylim)) ylim <- range(yy$fit,x) } if(replace) { plot(0,0, type="n", xlab=xlab,ylab=ylab, xlim=xlim,ylim=ylim, ...) } if(!is.na(shade.col)) { polygon(c(x,rev(x),x[1]), c(se.lower,rev(se.upper),se.lower[1]), col=shade.col, border=NA, density=shade.density) } lines(x,yy$fit,col=line.par$col) quantile.rug(p,side=rug.par$side) abline(0,1,col="red") } gbm/R/getCVgroup.R0000644000176000001440000000202212102666411013453 0ustar ripleyusersgetCVgroup <- # Construct cross-validation groups depending on the type of model to be fit function(distribution, class.stratify.cv, y, i.train, cv.folds, group){ if (distribution$name %in% c( "bernoulli", "multinomial" ) & class.stratify.cv ){ nc <- table(y[i.train]) # Number in each class uc <- names(nc) if (min(nc) < cv.folds){ stop( paste("The smallest class has only", min(nc), "objects in the training set. Can't do", cv.folds, "fold cross-validation.")) } cv.group <- vector(length = length(i.train)) for (i in 1:length(uc)){ cv.group[y[i.train] == uc[i]] <- sample(rep(1:cv.folds , length = nc[i])) } } # Close if else if (distribution$name == "pairwise") { # Split into CV folds at group boundaries s <- sample(rep(1:cv.folds, length=nlevels(group))) cv.group <- s[as.integer(group[i.train])] } else { cv.group <- sample(rep(1:cv.folds, length=length(i.train))) } cv.group } gbm/R/gbm.loss.R0000644000176000001440000000227012102666411013117 0ustar ripleyusersgbm.loss <- function(y, f, w, offset, dist, baseline, group=NULL, max.rank=NULL) { if (!is.na(offset)) { f <- offset+f } if (dist$name != "pairwise") { switch(dist$name, gaussian = weighted.mean((y - f)^2,w) - baseline, bernoulli = -2*weighted.mean(y*f - log(1+exp(f)),w) - baseline, laplace = weighted.mean(abs(y-f),w) - baseline, adaboost = weighted.mean(exp(-(2*y-1)*f),w) - baseline, poisson = -2*weighted.mean(y*f-exp(f),w) - baseline, stop(paste("Distribution",dist$name,"is not yet supported for method=permutation.test.gbm"))) } else # dist$name == "pairwise" { if (is.null(dist$metric)) { stop("No metric specified for distribution 'pairwise'") } if (!is.element(dist$metric, c("conc", "ndcg", "map", "mrr"))) { stop("Invalid metric '", dist$metric, "' specified for distribution 'pairwise'") } if (is.null(group)) { stop("For distribution 'pairwise', parameter 'group' has to be supplied") } # Loss = 1 - utility (1 - perf.pairwise(y, f, group, dist$metric, w, max.rank)) - baseline } } gbm/R/pretty.gbm.tree.R0000644000176000001440000000075012102666411014425 0ustar ripleyuserspretty.gbm.tree <- function(object,i.tree=1) { if((i.tree<1) || (i.tree>length(object$trees))) { stop("i.tree is out of range. Must be less than ",length(object$trees)) } else { temp <- data.frame(object$trees[[i.tree]]) names(temp) <- c("SplitVar","SplitCodePred","LeftNode", "RightNode","MissingNode","ErrorReduction", "Weight","Prediction") row.names(temp) <- 0:(nrow(temp)-1) } return(temp) } gbm/R/test.gbm.R0000644000176000001440000002423012477771576013146 0ustar ripleyuserstest.gbm <- function(){ # Based on example in R package # Gaussian example ############################################################################ ## test Gaussian distribution gbm model set.seed(1) cat("Running least squares regression example.\n") # create some data N <- 1000 X1 <- runif(N) X2 <- 2*runif(N) X3 <- factor(sample(letters[1:4],N,replace=T)) X4 <- ordered(sample(letters[1:6],N,replace=T)) X5 <- factor(sample(letters[1:3],N,replace=T)) X6 <- 3*runif(N) mu <- c(-1,0,1,2)[as.numeric(X3)] SNR <- 10 # signal-to-noise ratio Y <- X1**1.5 + 2 * (X2**.5) + mu sigma <- sqrt(var(Y)/SNR) Y <- Y + rnorm(N,0,sigma) # create a bunch of missing values X1[sample(1:N,size=100)] <- NA X3[sample(1:N,size=300)] <- NA w <- rep(1,N) data <- data.frame(Y=Y,X1=X1,X2=X2,X3=X3,X4=X4,X5=X5,X6=X6) # fit initial model gbm1 <- gbm(Y~X1+X2+X3+X4+X5+X6, # formula data=data, # dataset var.monotone=c(0,0,0,0,0,0), # -1: monotone decrease, +1: monotone increase, 0: no monotone restrictions distribution="gaussian", # bernoulli, adaboost, gaussian, poisson, coxph, or # list(name="quantile",alpha=0.05) for quantile regression n.trees=2000, # number of trees shrinkage=0.005, # shrinkage or learning rate, 0.001 to 0.1 usually work interaction.depth=3, # 1: additive model, 2: two-way interactions, etc bag.fraction = 0.5, # subsampling fraction, 0.5 is probably best train.fraction = 0.5, # fraction of data for training, first train.fraction*N used for training n.minobsinnode = 10, # minimum number of obs needed in each node keep.data=TRUE, cv.folds=10) # do 10-fold cross-validation # Get best model best.iter <- gbm.perf(gbm1,method="cv", plot.it=FALSE) # returns cv estimate of best number of trees set.seed(2) # make some new data N <- 1000 X1 <- runif(N) X2 <- 2*runif(N) X3 <- factor(sample(letters[1:4],N,replace=TRUE)) X4 <- ordered(sample(letters[1:6],N,replace=TRUE)) X5 <- factor(sample(letters[1:3],N,replace=TRUE)) X6 <- 3*runif(N) mu <- c(-1,0,1,2)[as.numeric(X3)] # Actual underlying signal Y <- X1**1.5 + 2 * (X2**.5) + mu # Want to see how close predictions are to the underlying signal; noise would just interfere with this # Y <- Y + rnorm(N,0,sigma) data2 <- data.frame(Y=Y,X1=X1,X2=X2,X3=X3,X4=X4,X5=X5,X6=X6) # predict on the new data using "best" number of trees f.predict <- predict(gbm1,data2,best.iter) # f.predict will be on the canonical scale (logit,log,etc.) # Base the validation tests on observed discrepancies RUnit::checkTrue(abs(mean(data2$Y-f.predict)) < 0.01, msg="Gaussian absolute error within tolerance") RUnit::checkTrue(sd(data2$Y-f.predict) < sigma , msg="Gaussian squared erroor within tolerance") ############################################################################ ## test coxph distribution gbm model ## COX PROPORTIONAL HAZARDS REGRESSION EXAMPLE cat("Running cox proportional hazards regression example.\n") # create some data set.seed(1) N <- 3000 X1 <- runif(N) X2 <- runif(N) X3 <- factor(sample(letters[1:4],N,replace=T)) mu <- c(-1,0,1,2)[as.numeric(X3)] f <- 0.5*sin(3*X1 + 5*X2^2 + mu/10) tt.surv <- rexp(N,exp(f)) tt.cens <- rexp(N,0.5) delta <- as.numeric(tt.surv <= tt.cens) tt <- apply(cbind(tt.surv,tt.cens),1,min) # throw in some missing values X1[sample(1:N,size=100)] <- NA X3[sample(1:N,size=300)] <- NA # random weights if you want to experiment with them w <- rep(1,N) data <- data.frame(tt=tt,delta=delta,X1=X1,X2=X2,X3=X3) # fit initial model gbm1 <- gbm(Surv(tt,delta)~X1+X2+X3, # formula data=data, # dataset weights=w, var.monotone=c(0,0,0), # -1: monotone decrease, +1: monotone increase, 0: no monotone restrictions distribution="coxph", n.trees=3000, # number of trees shrinkage=0.001, # shrinkage or learning rate, 0.001 to 0.1 usually work interaction.depth=3, # 1: additive model, 2: two-way interactions, etc bag.fraction = 0.5, # subsampling fraction, 0.5 is probably best train.fraction = 0.5, # fraction of data for training, first train.fraction*N used for training cv.folds = 5, # do 5-fold cross-validation n.minobsinnode = 10, # minimum total weight needed in each node keep.data = TRUE) best.iter <- gbm.perf(gbm1,method="test", plot.it=FALSE) # returns test set estimate of best number of trees # make some new data set.seed(2) N <- 1000 X1 <- runif(N) X2 <- runif(N) X3 <- factor(sample(letters[1:4],N,replace=T)) mu <- c(-1,0,1,2)[as.numeric(X3)] f <- 0.5*sin(3*X1 + 5*X2^2 + mu/10) # -0.5 <= f <= 0.5 via sin fn. tt.surv <- rexp(N,exp(f)) tt.cens <- rexp(N,0.5) data2 <- data.frame(tt=apply(cbind(tt.surv,tt.cens),1,min), delta=as.numeric(tt.surv <= tt.cens), f=f, X1=X1,X2=X2,X3=X3) # predict on the new data using "best" number of trees # f.predict will be on the canonical scale (logit,log,etc.) f.predict <- predict(gbm1,data2,best.iter) #plot(data2$f,f.predict) # Use observed sd RUnit::checkTrue(sd(data2$f - f.predict) < 0.4, msg="Coxph: squared error within tolerance") ############################################################################ ## Test bernoulli distribution gbm model set.seed(1) cat("Running logistic regression example.\n") # create some data N <- 1000 X1 <- runif(N) X2 <- runif(N) X3 <- factor(sample(letters[1:4],N,replace=T)) mu <- c(-1,0,1,2)[as.numeric(X3)] p <- 1/(1+exp(-(sin(3*X1) - 4*X2 + mu))) Y <- rbinom(N,1,p) # random weights if you want to experiment with them w <- rexp(N) w <- N*w/sum(w) data <- data.frame(Y=Y,X1=X1,X2=X2,X3=X3) # fit initial model gbm1 <- gbm(Y~X1+X2+X3, # formula data=data, # dataset weights=w, var.monotone=c(0,0,0), # -1: monotone decrease, +1: monotone increase, 0: no monotone restrictions distribution="bernoulli", n.trees=3000, # number of trees shrinkage=0.001, # shrinkage or learning rate, 0.001 to 0.1 usually work interaction.depth=3, # 1: additive model, 2: two-way interactions, etc bag.fraction = 0.5, # subsampling fraction, 0.5 is probably best train.fraction = 0.5, # fraction of data for training, first train.fraction*N used for training cv.folds=5, # do 5-fold cross-validation n.minobsinnode = 10) # minimum total weight needed in each node best.iter.test <- gbm.perf(gbm1,method="test", plot.it=FALSE) # returns test set estimate of best number of trees best.iter <- best.iter.test # make some new data set.seed(2) N <- 1000 X1 <- runif(N) X2 <- runif(N) X3 <- factor(sample(letters[1:4],N,replace=T)) mu <- c(-1,0,1,2)[as.numeric(X3)] p <- 1/(1+exp(-(sin(3*X1) - 4*X2 + mu))) Y <- rbinom(N,1,p) data2 <- data.frame(Y=Y,X1=X1,X2=X2,X3=X3) # predict on the new data using "best" number of trees # f.predict will be on the canonical scale (logit,log,etc.) f.1.predict <- predict.gbm(gbm1,data2, n.trees=best.iter.test) # compute quantity prior to transformation f.new = sin(3*X1) - 4*X2 + mu # Base the validation tests on observed discrepancies RUnit::checkTrue(sd(f.new - f.1.predict) < 1.0 ) invisible() } ################################################################################ ########################### test.relative.influence() ########################## ########################### ########################## test.relative.influence <- function(){ # Test that relative.influence really does pick out the true predictors set.seed(1234) X1 <- matrix(nrow=1000, ncol=50) X1 <- apply(X1, 2, function(x) rnorm(1000)) # Random noise X2 <- matrix(nrow=1000, ncol=5) X2 <- apply(X2, 2, function(x) c(rnorm(500), rnorm(500, 3))) # Real predictors cls <- rep(c(0, 1), ea=500) # Class X <- data.frame(cbind(X1, X2, cls)) mod <- gbm(cls ~ ., data= X, n.trees=1000, cv.folds=5, shrinkage=.01, interaction.depth=2) ri <- rev(sort(relative.influence(mod))) wh <- names(ri)[1:5] res <- sum(wh %in% paste("V", 51:55, sep = "")) RUnit::checkEqualsNumeric(res, 5, msg="Testing relative.influence identifies true predictors") } ################################################################################ ################################ validate.gbm() ################################ ################################ ################################ validate.gbm <- function () { if(!requireNamespace("RUnit", quietly = TRUE)) stop("You need to install the RUnit package to validate gbm") wh <- (1:length(search()))[search() == "package:gbm"] tests <- objects(wh)[substring(objects(wh), 1, 5) == "test."] # Create temporary directory to put tests into if (.Platform$OS.type == "windows"){ sep <- "\\" } else { sep <- "/" } dir <- file.path(tempdir(), "gbm.tests", fsep = sep) dir.create(dir) for (i in 1:length(tests)) { str <- paste(dir, sep, tests[i], ".R", sep = "") dump(tests[i], file = str) } res <- RUnit::defineTestSuite("gbm", dirs = dir, testFuncRegexp = "^test.+", testFileRegexp = "*.R") cat("Running gbm test suite.\nThis will take some time...\n\n") res <- RUnit::runTestSuite(res) res } gbm/R/print.gbm.R0000644000176000001440000000505712131267445013307 0ustar ripleyusers# print, show and summary functions for gbm print.gbm <- function(x, ... ) { if (!is.null(x$call)){ print(x$call) } dist.name <- x$distribution$name if (dist.name == "pairwise") { if (!is.null(x$distribution$max.rank) && x$distribution$max.rank > 0) { dist.name <- sprintf("pairwise (metric=%s, max.rank=%d)", x$distribution$metric, x$distribution$max.rank) } else { dist.name <- sprintf("pairwise (metric=%s)", x$distribution$metric) } } cat( paste( "A gradient boosted model with", dist.name, "loss function.\n" )) cat( paste( length( x$train.error ), "iterations were performed.\n" ) ) best <- length( x$train.error ) if ( !is.null( x$cv.error ) ) { best <- gbm.perf( x, plot.it = FALSE, method="cv" ) cat( paste("The best cross-validation iteration was ", best, ".\n", sep = "" ) ) } if ( x$train.fraction < 1 ) { best <- gbm.perf( x, plot.it = FALSE, method="test" ) cat( paste("The best test-set iteration was ", best, ".\n", sep = "" ) ) } if ( is.null( best ) ) { best <- length( x$train.error ) } ri <- relative.influence( x, n.trees=best ) cat( "There were", length( x$var.names ), "predictors of which", sum( ri > 0 ), "had non-zero influence.\n" ) invisible() } show.gbm <- print.gbm summary.gbm <- function(object, cBars=length(object$var.names), n.trees=object$n.trees, plotit=TRUE, order=TRUE, method=relative.influence, normalize=TRUE, ...) { if(n.trees < 1) { stop("n.trees must be greater than 0.") } if(n.trees > object$n.trees) { warning("Exceeded total number of GBM terms. Results use n.trees=",object$n.trees," terms.\n") n.trees <- object$n.trees } rel.inf <- method(object,n.trees) rel.inf[rel.inf<0] <- 0 if(order) { i <- order(-rel.inf) } else { i <- 1:length(rel.inf) } if(cBars==0) cBars <- min(10,length(object$var.names)) if(cBars>length(object$var.names)) cBars <- length(object$var.names) if(normalize) rel.inf <- 100*rel.inf/sum(rel.inf) if(plotit) { barplot(rel.inf[i[cBars:1]], horiz=TRUE, col=rainbow(cBars,start=3/6,end=4/6), names=object$var.names[i[cBars:1]], xlab="Relative influence",...) } return(data.frame(var=object$var.names[i], rel.inf=rel.inf[i])) } gbm/R/plot.gbm.R0000644000176000001440000003242112131264765013126 0ustar ripleyusersplot.gbm <- function(x, i.var=1, n.trees=x$n.trees, continuous.resolution=100, return.grid=FALSE, type="link", ...) { if (!is.element(type, c("link", "response"))){ stop( "type must be either 'link' or 'response'") } if(all(is.character(i.var))) { i <- match(i.var,x$var.names) if(any(is.na(i))) { stop("Plot variables not used in gbm model fit: ",i.var[is.na(i)]) } else { i.var <- i } } if((min(i.var)<1) || (max(i.var)>length(x$var.names))) { warning("i.var must be between 1 and ",length(x$var.names)) } if(n.trees > x$n.trees) { warning(paste("n.trees exceeds the number of trees in the model, ",x$n.trees, ". Plotting using ",x$n.trees," trees.",sep="")) n.trees <- x$n.trees } if(length(i.var) > 3) { warning("gbm.int.plot creates up to 3-way interaction plots.\nplot.gbm will only return the plotting data structure.") return.grid = TRUE } # generate grid to evaluate gbm model grid.levels <- vector("list",length(i.var)) for(i in 1:length(i.var)) { # continuous if(is.numeric(x$var.levels[[i.var[i]]])) { grid.levels[[i]] <- seq(min(x$var.levels[[i.var[i]]]), max(x$var.levels[[i.var[i]]]), length=continuous.resolution) } # categorical or ordered else { grid.levels[[i]] <- as.numeric(factor(x$var.levels[[i.var[i]]], levels=x$var.levels[[i.var[i]]]))-1 } } X <- expand.grid(grid.levels) names(X) <- paste("X",1:length(i.var),sep="") # Next if block for compatibility with objects created with 1.6 if (is.null(x$num.classes)){ x$num.classes <- 1 } # evaluate at each data point y <- .Call("gbm_plot", X = as.double(data.matrix(X)), cRows = as.integer(nrow(X)), cCols = as.integer(ncol(X)), n.class = as.integer(x$num.classes), i.var = as.integer(i.var-1), n.trees = as.integer(n.trees) , initF = as.double(x$initF), trees = x$trees, c.splits = x$c.splits, var.type = as.integer(x$var.type), PACKAGE = "gbm") if (x$distribution$name=="multinomial") { ## Put result into matrix form X$y <- matrix(y, ncol = x$num.classes) colnames(X$y) <- x$classes ## Use class probabilities if (type=="response"){ X$y <- exp(X$y) X$y <- X$y / matrix(rowSums(X$y), ncol=ncol(X$y), nrow=nrow(X$y)) } } else if(is.element(x$distribution$name, c("bernoulli", "pairwise")) && type=="response") { X$y <- 1/(1+exp(-y)) } else if ((x$distribution$name=="poisson") && (type=="response")){ X$y <- exp(y) } else if (type=="response"){ warning("type 'response' only implemented for 'bernoulli', 'poisson', 'multinomial', and 'pairwise'. Ignoring" ) } else { X$y <- y } # transform categorical variables back to factors f.factor <- rep(FALSE,length(i.var)) for(i in 1:length(i.var)) { if(!is.numeric(x$var.levels[[i.var[i]]])) { X[,i] <- factor(x$var.levels[[i.var[i]]][X[,i]+1], levels=x$var.levels[[i.var[i]]]) f.factor[i] <- TRUE } } if(return.grid) { names(X)[1:length(i.var)] <- x$var.names[i.var] return(X) } # create the plots if(length(i.var)==1) { if(!f.factor) { j <- order(X$X1) if (x$distribution$name == "multinomial") { if ( type == "response" ){ ylabel <- "Predicted class probability" } else { ylabel <- paste("f(",x$var.names[i.var],")",sep="") } plot(range(X$X1), range(X$y), type = "n", xlab = x$var.names[i.var], ylab = ylabel) for (ii in 1:x$num.classes){ lines(X$X1,X$y[,ii], xlab=x$var.names[i.var], ylab=paste("f(",x$var.names[i.var],")",sep=""), col = ii, ...) } } else if (is.element(x$distribution$name, c("bernoulli", "pairwise"))) { if ( type == "response" ){ ylabel <- "Predicted probability" } else { ylabel <- paste("f(",x$var.names[i.var],")",sep="") } plot( X$X1, X$y , type = "l", xlab = x$var.names[i.var], ylab=ylabel ) } else if ( x$distribution$name == "poisson" ){ if (type == "response" ){ ylabel <- "Predicted count" } else{ ylabel <- paste("f(",x$var.names[i.var],")",sep="") } plot( X$X1, X$y , type = "l", xlab = x$var.names[i.var], ylab=ylabel ) } else { plot(X$X1,X$y, type="l", xlab=x$var.names[i.var], ylab=paste("f(",x$var.names[i.var],")",sep=""),...) } } else { if (x$distribution$name == "multinomial") { nX <- length(X$X1) dim.y <- dim(X$y) if (type == "response" ){ ylabel <- "Predicted probability" } else{ ylabel <- paste("f(",x$var.names[i.var],")",sep="") } plot(c(0,nX), range(X$y), axes = FALSE, type = "n", xlab = x$var.names[i.var], ylab = ylabel) axis(side = 1, labels = FALSE, at = 0:nX) axis(side = 2) mtext(as.character(X$X1), side = 1, at = 1:nX - 0.5) segments(x1 = rep(1:nX - 0.75, each = dim.y[2]), y1 = as.vector(t(X$y)), x2 = rep(1:nX - 0.25, each = dim.y[2]), col = 1:dim.y[2]) } else if (is.element(x$distribution$name, c("bernoulli", "pairwise")) && type == "response" ){ ylabel <- "Predicted probability" plot( X$X1, X$y, type = "l", xlab=x$var.names[i.var], ylab=ylabel ) } else if ( x$distribution$name == "poisson" & type == "response" ){ ylabel <- "Predicted count" plot( X$X1, X$y, type = "l", xlab=x$var.names[i.var], ylab=ylabel ) } else { plot(X$X1,X$y, type="l", xlab=x$var.names[i.var], ylab=paste("f(",x$var.names[i.var],")",sep=""),...) } } } else if(length(i.var)==2) { if(!f.factor[1] && !f.factor[2]) { if (x$distribution$name == "multinomial") { for (ii in 1:x$num.classes){ X$temp <- X$y[, ii] print(levelplot(temp~X1*X2,data=X, xlab=x$var.names[i.var[1]], ylab=x$var.names[i.var[2]],...)) title(paste("Class:", dimnames(X$y)[[2]][ii])) } X$temp <- NULL } else { print(levelplot(y~X1*X2,data=X, xlab=x$var.names[i.var[1]], ylab=x$var.names[i.var[2]],...)) } } else if(f.factor[1] && !f.factor[2]) { if (x$distribution$name == "multinomial") { for (ii in 1:x$num.classes){ X$temp <- X$y[, ii] print( xyplot(temp~X2|X1,data=X, xlab=x$var.names[i.var[2]], ylab=paste("f(",x$var.names[i.var[1]],",",x$var.names[i.var[2]],")",sep=""), type="l", panel = panel.xyplot, ...) ) title(paste("Class:", dimnames(X$y)[[2]][ii])) } X$temp <- NULL } else { print(xyplot(y~X2|X1,data=X, xlab=x$var.names[i.var[2]], ylab=paste("f(",x$var.names[i.var[1]],",",x$var.names[i.var[2]],")",sep=""), type="l", panel = panel.xyplot, ...)) } } else if(!f.factor[1] && f.factor[2]) { if (x$distribution$name == "multinomial") { for (ii in 1:x$num.classes){ X$temp <- X$y[, ii] print( xyplot(temp~X1|X2,data=X, xlab=x$var.names[i.var[1]], ylab=paste("f(",x$var.names[i.var[1]],",",x$var.names[i.var[2]],")",sep=""), type="l", panel = panel.xyplot, ...) ) title(paste("Class:", dimnames(X$y)[[2]][ii])) } X$temp <- NULL } else { print(xyplot(y~X1|X2,data=X, xlab=x$var.names[i.var[1]], ylab=paste("f(",x$var.names[i.var[1]],",",x$var.names[i.var[2]],")",sep=""), type="l", panel = panel.xyplot, ...)) } } else { if (x$distribution$name == "multinomial") { for (ii in 1:x$num.classes){ X$temp <- X$y[, ii] print( stripplot(X1~temp|X2,data=X, xlab=x$var.names[i.var[2]], ylab=paste("f(",x$var.names[i.var[1]],",",x$var.names[i.var[2]],")",sep=""), ...) ) title(paste("Class:", dimnames(X$y)[[2]][ii])) } X$temp <- NULL } else { print(stripplot(X1~y|X2,data=X, xlab=x$var.names[i.var[2]], ylab=paste("f(",x$var.names[i.var[1]],",",x$var.names[i.var[2]],")",sep=""), ...)) } } } else if(length(i.var)==3) { i <- order(f.factor) X.new <- X[,i] X.new$y <- X$y names(X.new) <- names(X) # 0 factor, 3 continuous if(sum(f.factor)==0) { X.new$X3 <- equal.count(X.new$X3) if (x$distribution$name == "multinomial") { for (ii in 1:x$num.classes){ X.new$temp <- X.new$y[, ii] print( levelplot(temp~X1*X2|X3,data=X.new, xlab=x$var.names[i.var[i[1]]], ylab=x$var.names[i.var[i[2]]],...) ) title(paste("Class:", dimnames(X.new$y)[[2]][ii])) } X.new$temp <- NULL } else { print(levelplot(y~X1*X2|X3,data=X.new, xlab=x$var.names[i.var[i[1]]], ylab=x$var.names[i.var[i[2]]],...)) } } # 1 factor, 2 continuous else if(sum(f.factor)==1) { if (x$distribution$name == "multinomial") { for (ii in 1:x$num.classes){ X.new$temp <- X.new$y[, ii] print( levelplot(temp~X1*X2|X3,data=X.new, xlab=x$var.names[i.var[i[1]]], ylab=x$var.names[i.var[i[2]]],...)) title(paste("Class:", dimnames(X.new$y)[[2]][ii]) ) } X.new$temp <- NULL } else { print(levelplot(y~X1*X2|X3,data=X.new, xlab=x$var.names[i.var[i[1]]], ylab=x$var.names[i.var[i[2]]],...)) } } # 2 factors, 1 continuous else if(sum(f.factor)==2) { if (x$distribution$name == "multinomial") { for (ii in 1:x$num.classes){ X.new$temp <- X.new$y[, ii] print( xyplot(temp~X1|X2*X3,data=X.new, type="l", xlab=x$var.names[i.var[i[1]]], ylab=paste("f(",paste(x$var.names[i.var[1:3]],collapse=","),")",sep=""), panel = panel.xyplot, ...) ) title(paste("Class:", dimnames(X.new$y)[[2]][ii]) ) } X.new$temp <- NULL } else { print(xyplot(y~X1|X2*X3,data=X.new, type="l", xlab=x$var.names[i.var[i[1]]], ylab=paste("f(",paste(x$var.names[i.var[1:3]],collapse=","),")",sep=""), panel = panel.xyplot, ...)) } } # 3 factors, 0 continuous else if(sum(f.factor)==3) { if (x$distribution$name == "multinomial") { for (ii in 1:x$num.classes){ X.new$temp <- X.new$y[, ii] print( stripplot(X1~temp|X2*X3,data=X.new, xlab=x$var.names[i.var[i[1]]], ylab=paste("f(",paste(x$var.names[i.var[1:3]],collapse=","),")",sep=""), ...) ) title(paste("Class:", dimnames(X.new$y)[[2]][ii]) ) } X.new$temp <- NULL } else { print(stripplot(X1~y|X2*X3,data=X.new, xlab=x$var.names[i.var[i[1]]], ylab=paste("f(",paste(x$var.names[i.var[1:3]],collapse=","),")",sep=""), ...)) } } } } gbm/R/predict.gbm.R0000644000176000001440000000740012142655162013576 0ustar ripleyuserspredict.gbm <- function(object,newdata,n.trees, type="link", single.tree = FALSE, ...) { if ( missing( newdata ) ){ newdata <- reconstructGBMdata(object) } if ( missing(n.trees) ) { if ( object$train.fraction < 1 ){ n.trees <- gbm.perf( object, method="test", plot.it = FALSE ) } else if (!is.null(object$cv.error)){ n.trees <- gbm.perf( object, method="cv", plot.it = FALSE ) } else{ best <- length( object$train.error ) } cat( paste( "Using", n.trees, "trees...\n" ) ) } if(!is.element(type, c("link","response" ))) { stop("type must be either 'link' or 'response'") } if(!is.null(object$Terms)) { x <- model.frame(terms(reformulate(object$var.names)), newdata, na.action=na.pass) } else { x <- newdata } cRows <- nrow(x) cCols <- ncol(x) for(i in 1:cCols) { if(is.factor(x[,i])) { if (length(levels(x[,i])) > length(object$var.levels[[i]])) { new.compare <- levels(x[,i])[1:length(object$var.levels[[i]])] } else { new.compare <- levels(x[,i]) } if (!identical(object$var.levels[[i]], new.compare)) { x[,i] <- factor(x[,i], union(object$var.levels[[i]], levels(x[,i]))) } x[,i] <- as.numeric(x[,i])-1 } } x <- as.vector(unlist(x, use.names=FALSE)) if(missing(n.trees) || any(n.trees > object$n.trees)) { n.trees[n.trees>object$n.trees] <- object$n.trees warning("Number of trees not specified or exceeded number fit so far. Using ",paste(n.trees,collapse=" "),".") } i.ntree.order <- order(n.trees) # Next if block for compatibility with objects created with version 1.6. if (is.null(object$num.classes)){ object$num.classes <- 1 } predF <- .Call("gbm_pred", X=as.double(x), cRows=as.integer(cRows), cCols=as.integer(cCols), cNumClasses = as.integer(object$num.classes), n.trees=as.integer(n.trees[i.ntree.order]), initF=object$initF, trees=object$trees, c.split=object$c.split, var.type=as.integer(object$var.type), single.tree = as.integer(single.tree), PACKAGE = "gbm") if((length(n.trees) > 1) || (object$num.classes > 1)) { if(object$distribution$name=="multinomial") { predF <- array(predF, dim=c(cRows,object$num.classes,length(n.trees))) dimnames(predF) <- list(NULL, object$classes, n.trees) predF[,,i.ntree.order] <- predF } else { predF <- matrix(predF, ncol=length(n.trees), byrow=FALSE) colnames(predF) <- n.trees predF[,i.ntree.order] <- predF } } if(type=="response") { if(is.element(object$distribution$name, c("bernoulli", "pairwise"))) { predF <- 1/(1+exp(-predF)) } else if(object$distribution$name=="poisson") { predF <- exp(predF) } else if (object$distribution$name == "adaboost"){ predF <- 1 / (1 + exp(-2*predF)) } if(object$distribution$name=="multinomial") { pexp <- exp(predF) psum <- apply(pexp, c(1, 3), function(x) { x / sum(x) }) # Transpose each 2d array predF <- aperm(psum, c(2, 1, 3)) } if((length(n.trees)==1) && (object$distribution$name!="multinomial")) { predF <- as.vector(predF) } } if(!is.null(attr(object$Terms,"offset"))) { warning("predict.gbm does not add the offset to the predicted values.") } return(predF) } gbm/R/gbm.R0000644000176000001440000001350512134211007012133 0ustar ripleyusers.onAttach <- function(lib, pkg) { vers <- library(help=gbm)$info[[1]] vers <- vers[grep("Version:",vers)] vers <- rev(strsplit(vers," ")[[1]])[1] packageStartupMessage(paste("Loaded gbm",vers)) } gbm <- function(formula = formula(data), distribution = "bernoulli", data = list(), weights, var.monotone = NULL, n.trees = 100, interaction.depth = 1, n.minobsinnode = 10, shrinkage = 0.001, bag.fraction = 0.5, train.fraction = 1.0, cv.folds=0, keep.data = TRUE, verbose = 'CV', class.stratify.cv=NULL, n.cores=NULL){ theCall <- match.call() lVerbose <- if (!is.logical(verbose)) { FALSE } else { verbose } mf <- match.call(expand.dots = FALSE) m <- match(c("formula", "data", "weights", "offset"), names(mf), 0) mf <- mf[c(1, m)] mf$drop.unused.levels <- TRUE mf$na.action <- na.pass mf[[1]] <- as.name("model.frame") m <- mf mf <- eval(mf, parent.frame()) Terms <- attr(mf, "terms") y <- model.response(mf) if (missing(distribution)){ distribution <- guessDist(y) } else if (is.character(distribution)){ distribution <- list(name=distribution) } w <- model.weights(mf) offset <- model.offset(mf) var.names <- attributes(Terms)$term.labels x <- model.frame(terms(reformulate(var.names)), data, na.action=na.pass) # get the character name of the response variable response.name <- as.character(formula[[2]]) lVerbose <- if (!is.logical(verbose)) { FALSE } else { verbose } class.stratify.cv <- getStratify(class.stratify.cv, distribution) # groups (for pairwise distribution only) group <- NULL num.groups <- 0 # determine number of training instances if (distribution$name != "pairwise"){ nTrain <- floor(train.fraction * nrow(x)) } else { # distribution$name == "pairwise": # Sampling is by group, so we need to calculate them here distribution.group <- distribution[["group"]] if (is.null(distribution.group)) { stop("For pairwise regression, the distribution parameter must be a list with a parameter 'group' for the a list of the column names indicating groups, for example list(name=\"pairwise\",group=c(\"date\",\"session\",\"category\",\"keywords\")).") } # Check if group names are valid i <- match(distribution.group, colnames(data)) if (any(is.na(i))) { stop("Group column does not occur in data: ", distribution.group[is.na(i)]) } # Construct group index group <- factor(do.call(paste, c(data[,distribution.group, drop=FALSE], sep=":"))) # Check that weights are constant across groups if ((!missing(weights)) && (!is.null(weights))) { w.min <- tapply(w, INDEX=group, FUN=min) w.max <- tapply(w, INDEX=group, FUN=max) if (any(w.min != w.max)) { stop("For distribution 'pairwise', all instances for the same group must have the same weight") } # Normalize across groups w <- w * length(w.min) / sum(w.min) } # Shuffle groups, to remove bias when splitting into train/test set and/or CV folds perm.levels <- levels(group)[sample(1:nlevels(group))] group <- factor(group, levels=perm.levels) # The C function expects instances to be sorted by group and descending by target ord.group <- order(group, -y) group <- group[ord.group] y <- y[ord.group] x <- x[ord.group,,drop=FALSE] w <- w[ord.group] # Split into train and validation set, at group boundary num.groups.train <- max(1, round(train.fraction * nlevels(group))) # include all groups up to the num.groups.train nTrain <- max(which(group==levels(group)[num.groups.train])) Misc <- group } # close if(distribution$name=="coxph") ... cv.error <- NULL if(cv.folds>1) { cv.results <- gbmCrossVal(cv.folds, nTrain, n.cores, class.stratify.cv, data, x, y, offset, distribution, w, var.monotone, n.trees, interaction.depth, n.minobsinnode, shrinkage, bag.fraction, var.names, response.name, group) cv.error <- cv.results$error p <- cv.results$predictions } # Close if(cv.folds > 1 gbm.obj <- gbm.fit(x,y, offset = offset, distribution = distribution, w = w, var.monotone = var.monotone, n.trees = n.trees, interaction.depth = interaction.depth, n.minobsinnode = n.minobsinnode, shrinkage = shrinkage, bag.fraction = bag.fraction, nTrain = nTrain, keep.data = keep.data, verbose = lVerbose, var.names = var.names, response.name = response.name, group = group) gbm.obj$train.fraction <- train.fraction gbm.obj$Terms <- Terms gbm.obj$cv.error <- cv.error gbm.obj$cv.folds <- cv.folds gbm.obj$call <- theCall gbm.obj$m <- m if (cv.folds > 0){ gbm.obj$cv.fitted <- p } if (distribution$name == "pairwise") { # Data has been reordered according to queries. # We need to permute the fitted values to correspond # to the original order. gbm.obj$ord.group <- ord.group gbm.obj$fit <- gbm.obj$fit[order(ord.group)] } return(gbm.obj) } gbm/R/permutation.test.gbm.R0000644000176000001440000000320112102666411015457 0ustar ripleyuserspermutation.test.gbm <- function(object, n.trees) { # get variables used in the model i.vars <- sort(unique(unlist(lapply(object$trees[1:n.trees], function(x){unique(x[[1]])})))) i.vars <- i.vars[i.vars!=-1] + 1 rel.inf <- rep(0,length(object$var.names)) if(!is.null(object$data)) { y <- object$data$y os <- object$data$offset Misc <- object$data$Misc w <- object$data$w x <- matrix(object$data$x, ncol=length(object$var.names)) object$Terms <- NULL # this makes predict.gbm take x as it is if (object$distribution$name == "pairwise") { # group and cutoff are only relevant for distribution "pairwise" # in this case, the last element specifies the max rank # max rank = 0 means no cut off group <- Misc[1:length(y)] max.rank <- Misc[length(y)+1] } } else { stop("Model was fit with keep.data=FALSE. permutation.test.gbm has not been implemented for that case.") } # the index shuffler j <- sample(1:nrow(x)) for(i in 1:length(i.vars)) { x[ ,i.vars[i]] <- x[j,i.vars[i]] new.pred <- predict.gbm(object,newdata=x,n.trees=n.trees) rel.inf[i.vars[i]] <- gbm.loss(y,new.pred,w,os, object$distribution, object$train.error[n.trees], group, max.rank) x[j,i.vars[i]] <- x[ ,i.vars[i]] } return(rel.inf=rel.inf) } gbm/R/gbm.fit.R0000644000176000001440000003145412131277636012740 0ustar ripleyusersgbm.fit <- function(x,y, offset = NULL, misc = NULL, distribution = "bernoulli", w = NULL, var.monotone = NULL, n.trees = 100, interaction.depth = 1, n.minobsinnode = 10, shrinkage = 0.001, bag.fraction = 0.5, nTrain = NULL, train.fraction = NULL, keep.data = TRUE, verbose = TRUE, var.names = NULL, response.name = "y", group = NULL) { if(is.character(distribution)) { distribution <- list(name=distribution) } cRows <- nrow(x) cCols <- ncol(x) if(nrow(x) != ifelse(class(y)=="Surv", nrow(y), length(y))) { stop("The number of rows in x does not equal the length of y.") } # the preferred way to specify the number of training instances is via parameter 'nTrain'. # parameter 'train.fraction' is only maintained for backward compatibility. if(!is.null(nTrain) && !is.null(train.fraction)) { stop("Parameters 'nTrain' and 'train.fraction' cannot both be specified") } else if(!is.null(train.fraction)) { warning("Parameter 'train.fraction' of gbm.fit is deprecated, please specify 'nTrain' instead") nTrain <- floor(train.fraction*cRows) } else if(is.null(nTrain)) { # both undefined, use all training data nTrain <- cRows } if (is.null(train.fraction)){ train.fraction <- nTrain / cRows } if(is.null(var.names)) { var.names <- getVarNames(x) } # if(is.null(response.name)) { response.name <- "y" } # check dataset size if(nTrain * bag.fraction <= 2*n.minobsinnode+1) { stop("The dataset size is too small or subsampling rate is too large: nTrain*bag.fraction <= n.minobsinnode") } if (distribution$name != "pairwise") { w <- w*length(w)/sum(w) # normalize to N } # Do sanity checks ch <- checkMissing(x, y) interaction.depth <- checkID(interaction.depth) w <- checkWeights(w, length(y)) offset <- checkOffset(offset, y) Misc <- NA # setup variable types var.type <- rep(0,cCols) var.levels <- vector("list",cCols) for(i in 1:length(var.type)) { if(all(is.na(x[,i]))) { stop("variable ",i,": ",var.names[i]," has only missing values.") } if(is.ordered(x[,i])) { var.levels[[i]] <- levels(x[,i]) x[,i] <- as.numeric(x[,i])-1 var.type[i] <- 0 } else if(is.factor(x[,i])) { if(length(levels(x[,i]))>1024) stop("gbm does not currently handle categorical variables with more than 1024 levels. Variable ",i,": ",var.names[i]," has ",length(levels(x[,i]))," levels.") var.levels[[i]] <- levels(x[,i]) x[,i] <- as.numeric(x[,i])-1 var.type[i] <- max(x[,i],na.rm=TRUE)+1 } else if(is.numeric(x[,i])) { var.levels[[i]] <- quantile(x[,i],prob=(0:10)/10,na.rm=TRUE) } else { stop("variable ",i,": ",var.names[i]," is not of type numeric, ordered, or factor.") } # check for some variation in each variable if(length(unique(var.levels[[i]])) == 1) { warning("variable ",i,": ",var.names[i]," has no variation.") } } nClass <- 1 if(!("name" %in% names(distribution))) { stop("The distribution is missing a 'name' component, for example list(name=\"gaussian\")") } supported.distributions <- c("bernoulli","gaussian","poisson","adaboost","laplace","coxph","quantile", "tdist", "multinomial", "huberized", "pairwise") distribution.call.name <- distribution$name # check potential problems with the distributions if(!is.element(distribution$name,supported.distributions)) { stop("Distribution ",distribution$name," is not supported") } if((distribution$name == "bernoulli") && !all(is.element(y,0:1))) { stop("Bernoulli requires the response to be in {0,1}") } if((distribution$name == "huberized") && !all(is.element(y,0:1))) { stop("Huberized square hinged loss requires the response to be in {0,1}") } if((distribution$name == "poisson") && any(y<0)) { stop("Poisson requires the response to be positive") } if((distribution$name == "poisson") && any(y != trunc(y))) { stop("Poisson requires the response to be a positive integer") } if((distribution$name == "adaboost") && !all(is.element(y,0:1))) { stop("This version of AdaBoost requires the response to be in {0,1}") } if(distribution$name == "quantile") { if(length(unique(w)) > 1) { stop("This version of gbm for the quantile regression lacks a weighted quantile. For now the weights must be constant.") } if(is.null(distribution$alpha)) { stop("For quantile regression, the distribution parameter must be a list with a parameter 'alpha' indicating the quantile, for example list(name=\"quantile\",alpha=0.95).") } else if((distribution$alpha<0) || (distribution$alpha>1)) { stop("alpha must be between 0 and 1.") } Misc <- c(alpha=distribution$alpha) } if(distribution$name == "coxph") { if(class(y)!="Surv") { stop("Outcome must be a survival object Surv(time,failure)") } if(attr(y,"type")!="right") { stop("gbm() currently only handles right censored observations") } Misc <- y[,2] y <- y[,1] # reverse sort the failure times to compute risk sets on the fly i.train <- order(-y[1:nTrain]) n.test <- cRows - nTrain if(n.test > 0) { i.test <- order(-y[(nTrain+1):cRows]) + nTrain } else { i.test <- NULL } i.timeorder <- c(i.train,i.test) y <- y[i.timeorder] Misc <- Misc[i.timeorder] x <- x[i.timeorder,,drop=FALSE] w <- w[i.timeorder] if(!is.na(offset)) offset <- offset[i.timeorder] } if(distribution$name == "tdist") { if (is.null(distribution$df) || !is.numeric(distribution$df)){ Misc <- 4 } else { Misc <- distribution$df[1] } } if (distribution$name == "multinomial") { ## Ensure that the training set contains all classes classes <- attr(factor(y), "levels") nClass <- length(classes) if (nClass > nTrain){ stop(paste("Number of classes (", nClass, ") must be less than the size of the training set (", nTrain, ")", sep = "")) } # f <- function(a,x){ # min((1:length(x))[x==a]) # } new.idx <- as.vector(sapply(classes, function(a,x){ min((1:length(x))[x==a]) }, y)) all.idx <- 1:length(y) new.idx <- c(new.idx, all.idx[!(all.idx %in% new.idx)]) y <- y[new.idx] x <- x[new.idx, ] w <- w[new.idx] if (!is.null(offset)){ offset <- offset[new.idx] } ## Get the factors y <- as.numeric(as.vector(outer(y, classes, "=="))) ## Fill out the weight and offset w <- rep(w, nClass) if (!is.null(offset)){ offset <- rep(offset, nClass) } } # close if (dist... == "multinomial" if(distribution$name == "pairwise") { distribution.metric <- distribution[["metric"]] if (!is.null(distribution.metric)) { distribution.metric <- tolower(distribution.metric) supported.metrics <- c("conc", "ndcg", "map", "mrr") if (!is.element(distribution.metric, supported.metrics)) { stop("Metric '", distribution.metric, "' is not supported, use either 'conc', 'ndcg', 'map', or 'mrr'") } metric <- distribution.metric } else { warning("No metric specified, using 'ndcg'") metric <- "ndcg" # default distribution[["metric"]] <- metric } if (any(y<0)) { stop("targets for 'pairwise' should be non-negative") } if (is.element(metric, c("mrr", "map")) && (!all(is.element(y, 0:1)))) { stop("Metrics 'map' and 'mrr' require the response to be in {0,1}") } # Cut-off rank for metrics # Default of 0 means no cutoff max.rank <- 0 if (!is.null(distribution[["max.rank"]]) && distribution[["max.rank"]] > 0) { if (is.element(metric, c("ndcg", "mrr"))) { max.rank <- distribution[["max.rank"]] } else { stop("Parameter 'max.rank' cannot be specified for metric '", distribution.metric, "', only supported for 'ndcg' and 'mrr'") } } # We pass the cut-off rank to the C function as the last element in the Misc vector Misc <- c(group, max.rank) distribution.call.name <- sprintf("pairwise_%s", metric) } # close if (dist... == "pairwise" # create index upfront... subtract one for 0 based order x.order <- apply(x[1:nTrain,,drop=FALSE],2,order,na.last=FALSE)-1 x <- as.vector(data.matrix(x)) predF <- rep(0,length(y)) train.error <- rep(0,n.trees) valid.error <- rep(0,n.trees) oobag.improve <- rep(0,n.trees) if(is.null(var.monotone)) var.monotone <- rep(0,cCols) else if(length(var.monotone)!=cCols) { stop("Length of var.monotone != number of predictors") } else if(!all(is.element(var.monotone,-1:1))) { stop("var.monotone must be -1, 0, or 1") } fError <- FALSE gbm.obj <- .Call("gbm", Y=as.double(y), Offset=as.double(offset), X=as.double(x), X.order=as.integer(x.order), weights=as.double(w), Misc=as.double(Misc), cRows=as.integer(cRows), cCols=as.integer(cCols), var.type=as.integer(var.type), var.monotone=as.integer(var.monotone), distribution=as.character(distribution.call.name), n.trees=as.integer(n.trees), interaction.depth=as.integer(interaction.depth), n.minobsinnode=as.integer(n.minobsinnode), n.classes = as.integer(nClass), shrinkage=as.double(shrinkage), bag.fraction=as.double(bag.fraction), nTrain=as.integer(nTrain), fit.old=as.double(NA), n.cat.splits.old=as.integer(0), n.trees.old=as.integer(0), verbose=as.integer(verbose), PACKAGE = "gbm") names(gbm.obj) <- c("initF","fit","train.error","valid.error", "oobag.improve","trees","c.splits") gbm.obj$bag.fraction <- bag.fraction gbm.obj$distribution <- distribution gbm.obj$interaction.depth <- interaction.depth gbm.obj$n.minobsinnode <- n.minobsinnode gbm.obj$num.classes <- nClass gbm.obj$n.trees <- length(gbm.obj$trees) / nClass gbm.obj$nTrain <- nTrain gbm.obj$train.fraction <- train.fraction gbm.obj$response.name <- response.name gbm.obj$shrinkage <- shrinkage gbm.obj$var.levels <- var.levels gbm.obj$var.monotone <- var.monotone gbm.obj$var.names <- var.names gbm.obj$var.type <- var.type gbm.obj$verbose <- verbose gbm.obj$Terms <- NULL if(distribution$name == "coxph") { gbm.obj$fit[i.timeorder] <- gbm.obj$fit } ## If K-Classification is used then split the fit and tree components if (distribution$name == "multinomial"){ gbm.obj$fit <- matrix(gbm.obj$fit, ncol = nClass) dimnames(gbm.obj$fit)[[2]] <- classes gbm.obj$classes <- classes ## Also get the class estimators exp.f <- exp(gbm.obj$fit) denom <- matrix(rep(rowSums(exp.f), nClass), ncol = nClass) gbm.obj$estimator <- exp.f/denom } if(keep.data) { if(distribution$name == "coxph") { # put the observations back in order gbm.obj$data <- list(y=y,x=x,x.order=x.order,offset=offset,Misc=Misc,w=w, i.timeorder=i.timeorder) } else if ( distribution$name == "multinomial" ){ # Restore original order of the data new.idx <- order( new.idx ) gbm.obj$data <- list( y=as.vector(matrix(y, ncol=length(classes),byrow=FALSE)[new.idx,]), x=as.vector(matrix(x, ncol=length(var.names), byrow=FALSE)[new.idx,]), x.order=x.order, offset=offset[new.idx], Misc=Misc, w=w[new.idx] ) } else { gbm.obj$data <- list(y=y,x=x,x.order=x.order,offset=offset,Misc=Misc,w=w) } } else { gbm.obj$data <- NULL } class(gbm.obj) <- "gbm" return(gbm.obj) } gbm/R/checks.R0000644000176000001440000000251012131277224012632 0ustar ripleyuserscheckMissing <- function(x, y){ nms <- getVarNames(x) #### Check for NaNs in x and NAs in response j <- apply(x, 2, function(z) any(is.nan(z))) if(any(j)) { stop("Use NA for missing values. NaN found in predictor variables:", paste(nms[j],collapse=",")) } if(any(is.na(y))) stop("Missing values are not allowed in the response") invisible(NULL) } checkID <- function(id){ # Check for disallowed interaction.depth if(id < 1) { stop("interaction.depth must be at least 1.") } else if(id > 49) { stop("interaction.depth must be less than 50. You should also ask yourself why you want such large interaction terms. A value between 1 and 5 should be sufficient for most applications.") } invisible(id) } checkWeights <- function(w, n){ # Logical checks on weights if(length(w)==0) { w <- rep(1, n) } else if(any(w < 0)) stop("negative weights not allowed") w } checkOffset <- function(o, y){ # Check offset if(is.null(o) | all(o==0)) { o <- NA } else if(length(o) != length(y)) { stop("The length of offset does not equal the length of y.") } o } getVarNames <- function(x){ if(is.matrix(x)) { var.names <- colnames(x) } else if(is.data.frame(x)) { var.names <- names(x) } else { var.names <- paste("X",1:ncol(x),sep="") } var.names } gbm/R/basehaz.gbm.R0000644000176000001440000000206512131261772013561 0ustar ripleyusers# compute Breslow estimator of the baseline hazard function basehaz.gbm <- function(t,delta,f.x, t.eval=NULL, smooth=FALSE, cumulative=TRUE) { t.unique <- sort(unique(t[delta==1])) alpha <- length(t.unique) for(i in 1:length(t.unique)) { alpha[i] <- sum(t[delta==1]==t.unique[i])/ sum(exp(f.x[t>=t.unique[i]])) } if(!smooth && !cumulative) { if(!is.null(t.eval)) { stop("Cannot evaluate unsmoothed baseline hazard at t.eval.") } } else if(smooth && !cumulative) { lambda.smooth <- supsmu(t.unique,alpha) } else if(smooth && cumulative) { lambda.smooth <- supsmu(t.unique,cumsum(alpha)) } else # (!smooth && cumulative) - THE DEFAULT { lambda.smooth <- list(x=t.unique,y=cumsum(alpha)) } if(!is.null(t.eval)) { obj <- approx(lambda.smooth$x,lambda.smooth$y,xout=t.eval)$y } else { obj <- approx(lambda.smooth$x,lambda.smooth$y,xout=t)$y } return(obj) } gbm/R/reconstructGBMdata.R0000644000176000001440000000240612134234270015126 0ustar ripleyusersreconstructGBMdata <- function(x) { if(class(x) != "gbm") { stop( "This function is for use only with objects having class 'gbm'" ) } else if (is.null(x$data)) { stop("Cannot reconstruct data from gbm object. gbm() was called with keep.data=FALSE") } else if (x$distribution$name=="multinomial") { y <- matrix(x$data$y, ncol=x$num.classes, byrow=FALSE) yn <- apply(y, 1, function(z,nc) {(1:nc)[z == 1]}, nc = x$num.classes) y <- factor(yn, labels=x$classes) xdat <- matrix(x$data$x, ncol=ncol(x$data$x.order), byrow=FALSE) d <- data.frame(y, xdat) names(d) <- c(x$response.name, x$var.names) } else if (x$distribution$name == "coxph") { xdat <- matrix(x$data$x, ncol=ncol(x$data$x.order), byrow=FALSE) status <- x$data$Misc y <- x$data$y[order(x$data$i.timeorder)] d <- data.frame(y, status, xdat) names(d) <- c(x$response.name[-1], colnames(x$data$x.order)) } else { y <- x$data$y xdat <- matrix(x$data$x, ncol=ncol(x$data$x.order), byrow=FALSE) d <- data.frame(y, xdat) rn <- ifelse(length(x$response.name) > 1, x$response.name[2], x$response.name) names(d) <- c(rn, colnames(x$data$x.order)) } invisible(d) } gbm/R/relative.influence.R0000644000176000001440000000302112134211007015140 0ustar ripleyusersrelative.influence <- function(object, n.trees, scale. = FALSE, sort. = FALSE ) { if( missing( n.trees ) ){ if ( object$train.fraction < 1 ){ n.trees <- gbm.perf( object, method="test", plot.it=FALSE ) } else if ( !is.null( object$cv.error ) ){ n.trees <- gbm.perf( object, method="cv", plot.it = FALSE ) } else{ # If dist=multinomial, object$n.trees = n.trees * num.classes # so use the following instead. n.trees <- length( object$train.error ) } cat( paste( "n.trees not given. Using", n.trees, "trees.\n" ) ) if (object$distribution == "multinomial"){ n.trees <- n.trees * object$num.classes } } get.rel.inf <- function(obj) { lapply(split(obj[[6]],obj[[1]]),sum) # 6 - Improvement, 1 - var name } temp <- unlist(lapply(object$trees[1:n.trees],get.rel.inf)) rel.inf.compact <- unlist(lapply(split(temp,names(temp)),sum)) rel.inf.compact <- rel.inf.compact[names(rel.inf.compact)!="-1"] # rel.inf.compact excludes those variable that never entered the model # insert 0's for the excluded variables rel.inf <- rep(0,length(object$var.names)) i <- as.numeric(names(rel.inf.compact))+1 rel.inf[i] <- rel.inf.compact names(rel.inf) <- object$var.names if (scale.){ rel.inf <- rel.inf / max(rel.inf) } if (sort.){ rel.inf <- rev(sort(rel.inf)) } return(rel.inf=rel.inf) } gbm/R/gbm.more.R0000644000176000001440000002212312102666411013100 0ustar ripleyusersgbm.more <- function(object, n.new.trees = 100, data = NULL, weights = NULL, offset = NULL, verbose = NULL) { theCall <- match.call() nTrain <- object$nTrain if (object$distribution$name != "pairwise") { distribution.call.name <- object$distribution$name } else { distribution.call.name <- sprintf("pairwise_%s", object$distribution$metric) } if(is.null(object$Terms) && is.null(object$data)) { stop("The gbm model was fit using gbm.fit (rather than gbm) and keep.data was set to FALSE. gbm.more cannot locate the dataset.") } else if(is.null(object$data) && is.null(data)) { stop("keep.data was set to FALSE on original gbm call and argument 'data' is NULL") } else if(is.null(object$data)) { m <- eval(object$m, parent.frame()) Terms <- attr(m, "terms") a <- attributes(Terms) y <- as.vector(model.extract(m, "response")) offset <- model.extract(m,offset) x <- model.frame(delete.response(Terms), data, na.action=na.pass) w <- weights if(length(w)==0) w <- rep(1, nrow(x)) if (object$distribution$name != "pairwise") { w <- w*length(w)/sum(w) # normalize to N } if(is.null(offset) || (offset==0)) { offset <- NA } Misc <- NA if(object$distribution$name == "coxph") { Misc <- as.numeric(y)[-(1:cRows)] y <- as.numeric(y)[1:cRows] # reverse sort the failure times to compute risk sets on the fly i.train <- order(-y[1:nTrain]) i.test <- order(-y[(nTrain+1):cRows]) + nTrain i.timeorder <- c(i.train,i.test) y <- y[i.timeorder] Misc <- Misc[i.timeorder] x <- x[i.timeorder,,drop=FALSE] w <- w[i.timeorder] if(!is.na(offset)) offset <- offset[i.timeorder] object$fit <- object$fit[i.timeorder] } else if(object$distribution$name == "tdist" ){ Misc <- object$distribution$df } else if (object$distribution$name == "pairwise"){ # Check if group names are valid distribution.group <- object$distribution$group i <- match(distribution.group, colnames(data)) if (any(is.na(i))) { stop("Group column does not occur in data: ", distribution.group[is.na(i)]) } # construct group index group <- factor(do.call(paste, c(data[,distribution.group, drop=FALSE], sep=":"))) # Check that weights are constant across groups if ((!missing(weights)) && (!is.null(weights))) { w.min <- tapply(w, INDEX=group, FUN=min) w.max <- tapply(w, INDEX=group, FUN=max) if (any(w.min != w.max)) { stop("For distribution 'pairwise', all instances for the same group must have the same weight") } # Normalize across groups w <- w * length(w.min) / sum(w.min) } # Shuffle groups, to remove bias when splitting into train/test set and/or CV folds perm.levels <- levels(group)[sample(1:nlevels(group))] group <- factor(group, levels=perm.levels) # The C function expects instances to be sorted by group and descending by target ord.group <- object$ord.group group <- group[ord.group] y <- y[ord.group] x <- x[ord.group,,drop=FALSE] w <- x[ord.group] object$fit <- object$fit[ord.group] # object$fit is stored in the original order # Split into train and validation set, at group boundary num.groups.train <- max(1, round(object$train.fraction * nlevels(group))) # include all groups up to the num.groups.train nTrain <- max(which(group==levels(group)[num.groups.train])) metric <- object$distribution[["metric"]] if (is.element(metric, c("mrr", "map")) && (!all(is.element(y, 0:1)))) { stop("Metrics 'map' and 'mrr' require the response to be in {0,1}") } # Cut-off rank for metrics # We pass this argument as the last element in the Misc vector # Default of 0 means no cutoff max.rank <- 0 if (!is.null(object$distribution[["max.rank"]]) && object$distribution[["max.rank"]] > 0) { if (is.element(metric, c("ndcg", "mrr"))) { max.rank <- object$distribution[["max.rank"]] } else { stop("Parameter 'max.rank' cannot be specified for metric '", metric, "', only supported for 'ndcg' and 'mrr'") } } Misc <- c(group, max.rank) } # create index upfront... subtract one for 0 based order x.order <- apply(x[1:nTrain,,drop=FALSE],2,order,na.last=FALSE)-1 x <- data.matrix(x) cRows <- nrow(x) cCols <- ncol(x) } else { y <- object$data$y x <- object$data$x x.order <- object$data$x.order offset <- object$data$offset Misc <- object$data$Misc w <- object$data$w nTrain <- object$nTrain cRows <- length(y) cCols <- length(x)/cRows if(object$distribution$name == "coxph") { i.timeorder <- object$data$i.timeorder object$fit <- object$fit[i.timeorder] } if (object$distribution$name == "pairwise") { object$fit <- object$fit[object$ord.group] # object$fit is stored in the original order } } if(is.null(verbose)) { verbose <- object$verbose } x <- as.vector(x) gbm.obj <- .Call("gbm", Y = as.double(y), Offset = as.double(offset), X = as.double(x), X.order = as.integer(x.order), weights = as.double(w), Misc = as.double(Misc), cRows = as.integer(cRows), cCols = as.integer(cCols), var.type = as.integer(object$var.type), var.monotone = as.integer(object$var.monotone), distribution = as.character(distribution.call.name), n.trees = as.integer(n.new.trees), interaction.depth = as.integer(object$interaction.depth), n.minobsinnode = as.integer(object$n.minobsinnode), n.classes = as.integer(object$num.classes), shrinkage = as.double(object$shrinkage), bag.fraction = as.double(object$bag.fraction), train.fraction = as.integer(nTrain), fit.old = as.double(object$fit), n.cat.splits.old = as.integer(length(object$c.splits)), n.trees.old = as.integer(object$n.trees), verbose = as.integer(verbose), PACKAGE = "gbm") names(gbm.obj) <- c("initF","fit","train.error","valid.error", "oobag.improve","trees","c.splits") gbm.obj$initF <- object$initF gbm.obj$train.error <- c(object$train.error, gbm.obj$train.error) gbm.obj$valid.error <- c(object$valid.error, gbm.obj$valid.error) gbm.obj$oobag.improve <- c(object$oobag.improve, gbm.obj$oobag.improve) gbm.obj$trees <- c(object$trees, gbm.obj$trees) gbm.obj$c.splits <- c(object$c.splits, gbm.obj$c.splits) # cv.error not updated when using gbm.more gbm.obj$cv.error <- object$cv.error gbm.obj$cv.folds <- object$cv.folds gbm.obj$n.trees <- length(gbm.obj$trees) gbm.obj$distribution <- object$distribution gbm.obj$train.fraction <- object$train.fraction gbm.obj$shrinkage <- object$shrinkage gbm.obj$bag.fraction <- object$bag.fraction gbm.obj$var.type <- object$var.type gbm.obj$var.monotone <- object$var.monotone gbm.obj$var.names <- object$var.names gbm.obj$interaction.depth <- object$interaction.depth gbm.obj$n.minobsinnode <- object$n.minobsinnode gbm.obj$num.classes <- object$num.classes gbm.obj$nTrain <- object$nTrain gbm.obj$response.name <- object$response.name gbm.obj$Terms <- object$Terms gbm.obj$var.levels <- object$var.levels gbm.obj$verbose <- verbose if(object$distribution$name == "coxph") { gbm.obj$fit[i.timeorder] <- gbm.obj$fit } if (object$distribution$name == "pairwise") { # Data has been reordered according to queries. # We need to permute the fitted values to correspond # to the original order. gbm.obj$fit <- gbm.obj$fit[order(object$ord.group)] object$fit <- object$fit[order(object$ord.group)] gbm.obj$ord.group <- object$ord.group } if(!is.null(object$data)) { gbm.obj$data <- object$data } else { gbm.obj$data <- NULL } gbm.obj$m <- object$m gbm.obj$call <- theCall class(gbm.obj) <- "gbm" return(gbm.obj) } gbm/R/shrink.gbm.pred.R0000644000176000001440000000352212102666411014367 0ustar ripleyusersshrink.gbm.pred <- function(object,newdata,n.trees, lambda=rep(1,length(object$var.names)), ...) { if(length(lambda) != length(object$var.names)) { stop("lambda must have the same length as the number of variables in the gbm object.") } if(!is.null(object$Terms)) { x <- model.frame(delete.response(object$Terms), newdata, na.action=na.pass) } else { x <- newdata } cRows <- nrow(x) cCols <- ncol(x) for(i in 1:cCols) { if(is.factor(x[,i])) { j <- match(levels(x[,i]), object$var.levels[[i]]) if(any(is.na(j))) { stop(paste("New levels for variable ", object$var.names[i],": ", levels(x[,i])[is.na(j)],sep="")) } x[,i] <- as.numeric(x[,i])-1 } } x <- as.vector(unlist(x)) if(missing(n.trees) || any(n.trees > object$n.trees)) { n.trees <- n.trees[n.trees<=object$n.trees] if(length(n.trees)==0) n.trees <- object$n.trees warning("n.trees not specified or some values exceeded number fit so far. Using ",n.trees,".") } # sort n.trees so that predictions are easier to generate and store n.trees <- sort(n.trees) predF <- .Call("gbm_shrink_pred", X=as.double(x), cRows=as.integer(cRows), cCols=as.integer(cCols), n.trees=as.integer(n.trees), initF=object$initF, trees=object$trees, c.split=object$c.split, var.type=as.integer(object$var.type), depth=as.integer(object$interaction.depth), lambda=as.double(lambda), PACKAGE = "gbm") return(predF) } gbm/R/ir.measures.R0000644000176000001440000000644312102666411013636 0ustar ripleyusers# Functions to compute IR measures for pairwise loss for # a single group # Notes: # * Inputs are passed as a 2-elemen (y,f) list, to # facilitate the 'by' iteration # * Return the respective metric, or a negative value if # it is undefined for the given group # * For simplicity, we have no special handling for ties; # instead, we break ties randomly. This is slightly # inaccurate for individual groups, but should have # a small effect on the overall measure. # Area under ROC curve = ratio of correctly ranking pairs gbm.roc.area <- function(obs, pred) { n1 <- sum(obs) n <- length(obs) if (n==n1) { return(1) } # Fraction of concordant pairs # = sum_{pos}(rank-1) / #pairs with different labels # #pairs = n1 * (n-n1) return ((mean(rank(pred)[obs > 0]) - (n1 + 1)/2)/(n - n1)) } # Concordance Index: # Fraction of all pairs (i,j) with i0) if (length(f) <= 1 || num.pos == 0 || num.pos == length(f)) { return (-1.0) } else { return (gbm.roc.area(obs=y, pred=f)) } } ir.measure.mrr <- function(y.f, max.rank) { y <- y.f[[1]] f <- y.f[[2]] num.pos <- sum(y>0) if (length(f) <= 1 || num.pos == 0 || num.pos == length(f)) { return (-1.0) } ord <- order(f, decreasing=TRUE) min.idx.pos <- min(which(y[ord]>0)) if (min.idx.pos <= max.rank) { return (1.0 / min.idx.pos) } else { return (0.0) } } ir.measure.map <- function(y.f, max.rank=0) { # Note: max.rank is meaningless for MAP y <- y.f[[1]] f <- y.f[[2]] ord <- order(f, decreasing=TRUE) idx.pos <- which(y[ord]>0) num.pos <- length(idx.pos) if (length(f) <= 1 || num.pos == 0 || num.pos == length(f)) { return (-1.0) } # Above and including the rank of the i-th positive result, # there are exactly i positives and rank(i) total results return (sum((1:length(idx.pos))/idx.pos) / num.pos) } ir.measure.ndcg <- function(y.f, max.rank) { y <- y.f[[1]] f <- y.f[[2]] if (length(f) <= 1 || all(diff(y)==0)) { return (-1.0) } num.items <- min(length(f), max.rank) ord <- order(f, decreasing=TRUE) dcg <- sum(y[ord][1:num.items] / log2(2:(num.items+1))) # The best possible DCG: order by target ord.max <- order(y, decreasing=TRUE) dcg.max <- sum(y[ord.max][1:num.items] / log2(2:(num.items+1))) # Normalize return (dcg / dcg.max) } gbm/R/gbmCrossVal.R0000644000176000001440000001020512134211007013602 0ustar ripleyusers##' Perform gbm cross-validation ##' ##' This function has far too many arguments, but there isn't the ##' abstraction in gbm to lose them. gbmCrossVal <- function(cv.folds, nTrain, n.cores, class.stratify.cv, data, x, y, offset, distribution, w, var.monotone, n.trees, interaction.depth, n.minobsinnode, shrinkage, bag.fraction, var.names, response.name, group) { i.train <- 1:nTrain cv.group <- getCVgroup(distribution, class.stratify.cv, y, i.train, cv.folds, group) ## build the models cv.models <- gbmCrossValModelBuild(cv.folds, cv.group, n.cores, i.train, x, y, offset, distribution, w, var.monotone, n.trees, interaction.depth, n.minobsinnode, shrinkage, bag.fraction, var.names, response.name, group) ## get the errors cv.error <- gbmCrossValErr(cv.models, cv.folds, cv.group, nTrain, n.trees) best.iter.cv <- which.min(cv.error) ## get the predictions predictions <- gbmCrossValPredictions(cv.models, cv.folds, cv.group, best.iter.cv, distribution, data[i.train,], y) list(error=cv.error, predictions=predictions) } ##' Get the gbm cross-validation error gbmCrossValErr <- function(cv.models, cv.folds, cv.group, nTrain, n.trees) { in.group <- tabulate(cv.group, nbins=cv.folds) cv.error <- vapply(1:cv.folds, function(index) { model <- cv.models[[index]] model$valid.error * in.group[[index]] }, double(n.trees)) ## this is now a (n.trees, cv.folds) matrix ## and now a n.trees vector rowSums(cv.error) / nTrain } ##' Get the predictions for GBM cross validation ##' ##' This function is not as nice as it could be (leakage of y) gbmCrossValPredictions <- function(cv.models, cv.folds, cv.group, best.iter.cv, distribution, data, y) { ## test cv.group and data match if (nrow(data) != length(cv.group)) { stop("mismatch between data and cv.group") } ## this is a little complicated due to multinomial distribution num.cols <- if (distribution$name == "multinomial") { nlevels(factor(y)) } else { 1 } result <- matrix(nrow=nrow(data), ncol=num.cols) ## there's no real reason to do this as other than a for loop data.names <- names(data) for (ind in 1:cv.folds) { ## these are the particular elements flag <- cv.group == ind model <- cv.models[[ind]] ## the %in% here is to handle coxph my.data <- data[flag, !(data.names %in% model$response.name)] predictions <- predict(model, newdata=my.data, n.trees=best.iter.cv) predictions <- matrix(predictions, ncol=num.cols) result[flag,] <- predictions } if (distribution$name != "multinomial") { result <- as.numeric(result) } result } ##' Perform gbm cross-validation ##' ##' This function has far too many arguments. gbmCrossValModelBuild <- function(cv.folds, cv.group, n.cores, i.train, x, y, offset, distribution, w, var.monotone, n.trees, interaction.depth, n.minobsinnode, shrinkage, bag.fraction, var.names, response.name, group) { ## set up the cluster and add a finalizer cluster <- gbmCluster(n.cores) on.exit(stopCluster(cluster)) ## get ourselves some random seeds seeds <- as.integer(runif(cv.folds, -(2^31 - 1), 2^31)) ## now do the cross-validation model builds parLapply(cl=cluster, X=1:cv.folds, gbmDoFold, i.train, x, y, offset, distribution, w, var.monotone, n.trees, interaction.depth, n.minobsinnode, shrinkage, bag.fraction, cv.group, var.names, response.name, group, seeds) } gbm/R/guessDist.R0000644000176000001440000000052712102666411013350 0ustar ripleyusersguessDist <- function(y){ # If distribution is not given, try to guess it if (length(unique(y)) == 2){ d <- "bernoulli" } else if (class(y) == "Surv" ){ d <- "coxph" } else if (is.factor(y)){ d <- "multinomial" } else{ d <- "gaussian" } cat(paste("Distribution not specified, assuming", d, "...\n")) list(name=d) } gbm/MD50000644000176000001440000001425413064146243011370 0ustar ripleyusers108bdba2eb6f2ba6ce890f47224ef68f *CHANGES cf8eedb04e0b7de4ba83cabfe278a328 *DESCRIPTION c2cae135a9c0d4ae15e14e89166ba841 *LICENSE dcb19d54815086722ad172c662cb7d03 *NAMESPACE 6a1293bc6f87d439da835b1b837f9c52 *R/basehaz.gbm.R cc5e4cd5f5d5e23382bae904e9ada152 *R/calibrate.plot.R 5615ac799ce14603a692a2c29be9648f *R/checks.R cf5a5bce0653ae59317ddac8bfe8d389 *R/gbm.R 428c0d3515d5fcbbdd992e10f5d22793 *R/gbm.fit.R 1de9823ae906c064f61a39bd1e0241d3 *R/gbm.loss.R ab8e510ccde4446a7c93ff384ba3217c *R/gbm.more.R 5a79d41470d1f8ae3b8c278bc5e12389 *R/gbm.perf.R 0fdb6a883897939376827795e4ee5230 *R/gbmCluster.R f4651f14ae6acdfa96319bb257f9d0e1 *R/gbmCrossVal.R 7201fac67c6152443cf2a2c3b5989116 *R/gbmDoFold.R f5cc3af1a8eb7ddbf962038e88d27953 *R/getCVgroup.R efd18f017f7a73397141bf4239c922ce *R/getStratify.R 696197960954e0845b8998911987cab2 *R/guessDist.R be47e36ef092244404831df5227f6d65 *R/interact.gbm.R f8c4c5e164b772b3bfc152b8e5659e2e *R/ir.measures.R bbfe015167ca3c75ecd155f6b090f661 *R/permutation.test.gbm.R 51c2749906af39dc17eb1af54b4d861d *R/plot.gbm.R b9c2bb5000212628b390b81dfdd895c0 *R/predict.gbm.R 7e3daea77a7b6ffa18e9f81cf0e0b152 *R/pretty.gbm.tree.R 13ac361d8e3f54893f7de0b66351eee4 *R/print.gbm.R 36d2345c029a4e8384703c92d46f9b2e *R/reconstructGBMdata.R 792e2a5c7cdfeeee3e29c4e418f8af35 *R/relative.influence.R e8cf40a7c7efcd820e908a43252cfc2b *R/shrink.gbm.R eefc2a06d746e77ac2ba101d240640b8 *R/shrink.gbm.pred.R 5b47e86c97e9b464bd64e7ea647c65ae *R/test.gbm.R 4e38ebb4d3578e523b7d94fc9ece3d65 *demo/00Index e3bd8606063f15ded6ab3261c13d22af *demo/OOB-reps.R 354344b4f6e8a232508ef872ced5efa3 *demo/bernoulli.R f7599f6ddc6852ba0721651a46601b06 *demo/coxph.R bb1c84d68320171ac205bb33114d49e1 *demo/gaussian.R 31906c0a7bce9676949413f0fbff2c6c *demo/multinomial.R af763746809ed98e48e065f77942cb05 *demo/pairwise.R dbff7ebcc6a18e27c1b423fd5db70ae3 *demo/printExamples.R 79316127956b8f5291f5021f1e7c89ef *demo/robustReg.R 5e674508b7fde23e247a6e1a6c6b6ec6 *inst/doc/gbm.Sweave e73636a53327b5e049e5764b0620d03e *inst/doc/gbm.pdf b63bc1c2450ad4bca8db60e03b932a53 *inst/doc/gbm.tex 64dbd4ec219c6e855b87bc4ddeba111e *inst/doc/index.html dc706f07b81a76bf9aab2edf4641e86f *inst/doc/oobperf2.eps 7ba661d197d25537a69fc34d737b4d29 *inst/doc/oobperf2.pdf 9d73da9632fed38c327ffbd1b072347b *inst/doc/shrinkage-v-iterations.eps 3fda19791155842b0e48565781441aa2 *inst/doc/shrinkage-v-iterations.pdf 4d55dd49b93485a78ecb50caafd19b4e *inst/doc/shrinkageplot.R 90fd593dd07098b5600fb650e86733ff *inst/doc/srcltx.sty ce7a173a73fb952a1bf40cb65e3b99f2 *man/basehaz.gbm.Rd 7fca3316fe15ef40546c3db911d67086 *man/calibrate.plot.Rd 99fab30dc167a5c90a1d1424e71a25f4 *man/gbm-internal.Rd dbbaa87e0b50024671667d8d38008e64 *man/gbm-package.Rd eac981fe86aac2cf2b76f2bcee97289f *man/gbm.Rd 089cf48c905c3429ed63f69a0cd982b5 *man/gbm.object.Rd 3ed5b048c81d016868ca2799e4504419 *man/gbm.perf.Rd 7359f0a3b1f2d27cf29e497745c6ba59 *man/gbm.roc.area.Rd 9e8eb660baefa82bc008cbf7e12babf8 *man/gbmCrossVal.Rd 8fca4f44be806cb17eb37affe8334618 *man/interact.gbm.Rd a8728abc1dc77b599c2aa7d1df6f982e *man/plot.gbm.Rd 5896d84873dd1ed5d22005b5b37b17b6 *man/predict.gbm.Rd 1656ffd7646d41236545e0399a70afdd *man/pretty.gbm.tree.Rd 894215a9e1e715f39a6cb79a6fe81baf *man/print.gbm.Rd 0da8961be170c9a72df248d6f0fe985e *man/quantile.rug.Rd 9fbb2bddffae7a639d4f702817eeecb3 *man/reconstructGBMdata.Rd e1dea92edf78383b17139d45c318294c *man/relative.influence.Rd b58470798d31cfceceeec40252ce833f *man/shrink.gbm.Rd ef52c476e46b4c64eee269064ea58b64 *man/shrink.gbm.pred.Rd b73e9273873b1178e9a116187105c022 *man/summary.gbm.Rd 3e0b677bccf30388ec0fc96f77f5fb62 *man/validate.Rd 0d32ce72a7b02fc57d602c60b9ba8305 *src/adaboost.cpp 2f5d22dc3043e69628763cbe303e6b5f *src/adaboost.h 6d2bd44a11975c8f023640eb7a9036c3 *src/bac/gaussian.cpp c877a1d31fa93463ed5d3ccd2164aa80 *src/bernoulli.cpp 323f73ab809cff64ad5b4f336157f295 *src/bernoulli.h 088062cab2532d24fa3a9fc5affcf69a *src/buildinfo.h e15f767c646f66e54eb5bb20ccd7cebd *src/coxph.cpp e110cbd0b715934c4e0257cf20e9c1da *src/coxph.h 3616890b5d7af2b3edd52dc5f29544b0 *src/dataset.cpp d30f46362b1915f76e5a328ce95c7136 *src/dataset.h b5824ccf353076bf59018429ae3ac6ac *src/distribution.cpp 91d88e455827695f63bf23df5dfb3108 *src/distribution.h 6d2bd44a11975c8f023640eb7a9036c3 *src/gaussian.cpp 6c2bf2616a3b4491aaaf501346246d3f *src/gaussian.h 1d8d4e59887769602b1d3c8dc3d5f94f *src/gbm.cpp 0f49e8549558916322ec80e29b591a73 *src/gbm.h c0c572eb464dae70700ffe8fdc3f6b9f *src/gbm_engine.cpp b3f1f49fa614ac6cfd52b28191bfdb70 *src/gbm_engine.h f1da15864dab021cdac1617ffba4ff0f *src/gbmentry.cpp 1fba83f37e9f092d8b005e0c8f32a97b *src/huberized.cpp 141e5b762944c14a0b6294e15046296f *src/huberized.h cd2cedbf213ddbc773ea20fe354a93ae *src/init.c 10dcf061e2807ca52f811ec6650f33ad *src/laplace.cpp 53b4d97c482517fbbc97162da1adf891 *src/laplace.h e7958b4630de29d3848d057d2aebc6e2 *src/locationm.cpp 932f3d98f158ebf6ae11ed47e873a7f3 *src/locationm.h 39094967ceaabf7c744bc93d0b86d22f *src/matrix.h 7242e54abea29c46990c4aabba7a65b6 *src/multinomial.cpp 8798fe266a8bad59ac9b3e7019cebbe8 *src/multinomial.h 75737afcbdd3162c62fcdd82b027e1d2 *src/node.cpp 3f7d35689f88a25a8f536d31c4ce172b *src/node.h 49da51b394dccb0063fa7b5e4ed662d6 *src/node_categorical.cpp 98afbdcf5bb70211102e58ed262fcec1 *src/node_categorical.h 74913ea93e6707eb49e52ac24047ae07 *src/node_continuous.cpp f09bd89f861430f58cb80ccf0de77c6a *src/node_continuous.h af2b9dd107d657344891521829c52243 *src/node_factory.cpp 3b80b8101a773a42a06eb41b5c6b01c9 *src/node_factory.h 56dc9a7a6309294654e641c14a32023d *src/node_nonterminal.cpp 062cbcf913ad61d33048c36ab0b76735 *src/node_nonterminal.h a99c0738f82cb857c87b45a65d4e8f25 *src/node_search.cpp 76b812a554f8ce9e7ea64c6f3c7631ee *src/node_search.h c6943942255ce8138259b6b47caa0c08 *src/node_terminal.cpp 084bcc63d1b33ca200460b88ef36b8f6 *src/node_terminal.h b763976a9c68d9e975417a84b7e2b3c4 *src/pairwise.cpp 8dc9c440afcb8d96f881c6d56ecae4d6 *src/pairwise.h 756422dc1f3f394260fa4d77ec42d1ed *src/poisson.cpp 0c901877981c1df8c4d82f6dd99c9231 *src/poisson.h 64e10460138c1b67923020b58cf1a599 *src/quantile.cpp 491d792d90d047d5a8c192253b632252 *src/quantile.h 519b30584e7e752480750e86027aea7e *src/tdist.cpp 9ab15eb81fc9a18ee7d14a76f7aefd2a *src/tdist.h 276e36bf158250eb458a1cdabcf975b5 *src/tree.cpp 6b2f1cd60e5d67638e110e1ac9552b27 *src/tree.h gbm/DESCRIPTION0000644000176000001440000000217213064146243012562 0ustar ripleyusersPackage: gbm Version: 2.1.3 Date: 2017-03-21 Title: Generalized Boosted Regression Models Author: Greg Ridgeway with contributions from others Maintainer: ORPHANED Depends: R (>= 2.9.0), survival, lattice, splines, parallel Suggests: RUnit Description: An implementation of extensions to Freund and Schapire's AdaBoost algorithm and Friedman's gradient boosting machine. Includes regression methods for least squares, absolute loss, t-distribution loss, quantile regression, logistic, multinomial logistic, Poisson, Cox proportional hazards partial likelihood, AdaBoost exponential loss, Huberized hinge loss, and Learning to Rank measures (LambdaMart). License: GPL (>= 2) | file LICENSE URL: http://code.google.com/p/gradientboostedmodels/ Packaged: 2017-03-21 06:44:01 UTC; ripley NeedsCompilation: yes Repository: CRAN Date/Publication: 2017-03-21 06:48:03 UTC X-CRAN-Original-Maintainer: Harry Southworth X-CRAN-Comment: Orphaned on 2017-03-21 as long-standing errors were not corrected. NMU by CRAN team. gbm/man/0000755000176000001440000000000013064144760011627 5ustar ripleyusersgbm/man/summary.gbm.Rd0000644000176000001440000000531513064144760014363 0ustar ripleyusers\name{summary.gbm} \alias{summary.gbm} \title{ Summary of a gbm object } \description{ Computes the relative influence of each variable in the gbm object. } \usage{ \method{summary}{gbm}(object, cBars=length(object$var.names), n.trees=object$n.trees, plotit=TRUE, order=TRUE, method=relative.influence, normalize=TRUE, ...) } \arguments{ \item{object}{a \code{gbm} object created from an initial call to \code{\link{gbm}}.} \item{cBars}{ the number of bars to plot. If \code{order=TRUE} the only the variables with the \code{cBars} largest relative influence will appear in the barplot. If \code{order=FALSE} then the first \code{cBars} variables will appear in the plot. In either case, the function will return the relative influence of all of the variables.} \item{n.trees}{ the number of trees used to generate the plot. Only the first \code{n.trees} trees will be used.} \item{plotit}{ an indicator as to whether the plot is generated. } \item{order}{ an indicator as to whether the plotted and/or returned relative influences are sorted. } \item{method}{ The function used to compute the relative influence. \code{\link{relative.influence}} is the default and is the same as that described in Friedman (2001). The other current (and experimental) choice is \code{\link{permutation.test.gbm}}. This method randomly permutes each predictor variable at a time and computes the associated reduction in predictive performance. This is similar to the variable importance measures Breiman uses for random forests, but \code{gbm} currently computes using the entire training dataset (not the out-of-bag observations).} \item{normalize}{ if \code{FALSE} then \code{summary.gbm} returns the unnormalized influence. } \item{...}{ other arguments passed to the plot function. } } \details{ For \code{distribution="gaussian"} this returns exactly the reduction of squared error attributable to each variable. For other loss functions this returns the reduction attributeable to each varaible in sum of squared error in predicting the gradient on each iteration. It describes the relative influence of each variable in reducing the loss function. See the references below for exact details on the computation. } \value{ Returns a data frame where the first component is the variable name and the second is the computed relative influence, normalized to sum to 100. } \references{ J.H. Friedman (2001). "Greedy Function Approximation: A Gradient Boosting Machine," Annals of Statistics 29(5):1189-1232. L. Breiman (2001).\href{https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf}{Random Forests}. } \author{Greg Ridgeway \email{gregridgeway@gmail.com}} \seealso{ \code{\link{gbm}} } \keyword{ hplot } gbm/man/predict.gbm.Rd0000644000176000001440000000453012102666411014310 0ustar ripleyusers\name{predict.gbm} \alias{predict.gbm} \title{ Predict method for GBM Model Fits } \description{ Predicted values based on a generalized boosted model object } \usage{ \method{predict}{gbm}(object, newdata, n.trees, type="link", single.tree=FALSE, ...) } \arguments{ \item{object}{ Object of class inheriting from (\code{\link{gbm.object}}) } \item{newdata}{ Data frame of observations for which to make predictions } \item{n.trees}{ Number of trees used in the prediction. \code{n.trees} may be a vector in which case predictions are returned for each iteration specified} \item{type}{ The scale on which gbm makes the predictions } \item{single.tree}{If \code{single.tree=TRUE} then \code{predict.gbm} returns only the predictions from tree(s) \code{n.trees}} \item{\dots}{ further arguments passed to or from other methods } } \details{ \code{predict.gbm} produces predicted values for each observation in \code{newdata} using the the first \code{n.trees} iterations of the boosting sequence. If \code{n.trees} is a vector than the result is a matrix with each column representing the predictions from gbm models with \code{n.trees[1]} iterations, \code{n.trees[2]} iterations, and so on. The predictions from \code{gbm} do not include the offset term. The user may add the value of the offset to the predicted value if desired. If \code{object} was fit using \code{\link{gbm.fit}} there will be no \code{Terms} component. Therefore, the user has greater responsibility to make sure that \code{newdata} is of the same format (order and number of variables) as the one originally used to fit the model. } \value{ Returns a vector of predictions. By default the predictions are on the scale of f(x). For example, for the Bernoulli loss the returned value is on the log odds scale, poisson loss on the log scale, and coxph is on the log hazard scale. If \code{type="response"} then \code{gbm} converts back to the same scale as the outcome. Currently the only effect this will have is returning probabilities for bernoulli and expected counts for poisson. For the other distributions "response" and "link" return the same. } \author{Greg Ridgeway \email{gregridgeway@gmail.com}} \seealso{ \code{\link{gbm}}, \code{\link{gbm.object}} } \keyword{ models } \keyword{ regression } gbm/man/shrink.gbm.pred.Rd0000644000176000001440000000174512102666411015112 0ustar ripleyusers\name{shrink.gbm.pred} \alias{shrink.gbm.pred} %- Also NEED an '\alias' for EACH other topic documented here. \title{ Predictions from a shrunked GBM } \description{ Makes predictions from a shrunken GBM model. } \usage{ shrink.gbm.pred(object, newdata, n.trees, lambda = rep(1, length(object$var.names)), ...) } \arguments{ \item{object}{ a \code{\link{gbm.object}} } \item{newdata}{ dataset for predictions } \item{n.trees}{ the number of trees to use } \item{lambda}{ a vector with length equal to the number of variables containing the shrinkage parameter for each variable } \item{\dots}{ other parameters (ignored) } } \value{ A vector with length equal to the number of observations in newdata containing the predictions } \author{ Greg Ridgeway \email{gregridgeway@gmail.com} } \section{Warning}{This function is experimental} \seealso{ \code{\link{shrink.gbm}}, \code{\link{gbm}} } \keyword{ methods } gbm/man/shrink.gbm.Rd0000644000176000001440000000277712102666411014167 0ustar ripleyusers\name{shrink.gbm} \alias{shrink.gbm} \title{ L1 shrinkage of the predictor variables in a GBM } \description{ Performs recursive shrinkage in each of the trees in a GBM fit using different shrinkage parameters for each variable. } \usage{ shrink.gbm(object, n.trees, lambda = rep(10, length(object$var.names)), ...) } \arguments{ \item{object}{ A \code{\link{gbm.object}} } \item{n.trees}{ the number of trees to use } \item{lambda}{ a vector with length equal to the number of variables containing the shrinkage parameter for each variable } \item{\dots}{ other parameters (ignored) } } \details{ This function is currently experimental. Used in conjunction with a gradient ascent search for inclusion of variables. } \value{ \item{predF}{Predicted values from the shrunken tree} \item{objective}{The value of the loss function associated with the predicted values} \item{gradient}{A vector with length equal to the number of variables containing the derivative of the objective function with respect to beta, the logit transform of the shrinkage parameter for each variable} } \references{ Hastie, T. J., and Pregibon, D. \href{http://www-stat.stanford.edu/~hastie/Papers/shrinktree.ps}{Shrinking Trees}. AT&T Bell Laboratories Technical Report (March 1990).} \author{ Greg Ridgeway \email{gregridgeway@gmail.com} } \section{Warning}{This function is experimental.} \seealso{ \code{\link{shrink.gbm.pred}}, \code{\link{gbm}} } \keyword{ methods}% at least one, from doc/KEYWORDS gbm/man/plot.gbm.Rd0000644000176000001440000000512012102666411013630 0ustar ripleyusers\name{plot.gbm} \alias{plot.gbm} \title{ Marginal plots of fitted gbm objects } \description{ Plots the marginal effect of the selected variables by "integrating" out the other variables. } \usage{ \method{plot}{gbm}(x, i.var = 1, n.trees = x$n.trees, continuous.resolution = 100, return.grid = FALSE, type = "link", ...) } \arguments{ \item{x}{ a \code{\link{gbm.object}} fitted using a call to \code{\link{gbm}}} \item{i.var}{a vector of indices or the names of the variables to plot. If using indices, the variables are indexed in the same order that they appear in the initial \code{gbm} formula. If \code{length(i.var)} is between 1 and 3 then \code{plot.gbm} produces the plots. Otherwise, \code{plot.gbm} returns only the grid of evaluation points and their average predictions} \item{n.trees}{ the number of trees used to generate the plot. Only the first \code{n.trees} trees will be used} \item{continuous.resolution}{ The number of equally space points at which to evaluate continuous predictors } \item{return.grid}{ if \code{TRUE} then \code{plot.gbm} produces no graphics and only returns the grid of evaluation points and their average predictions. This is useful for customizing the graphics for special variable types or for dimensions greater than 3 } \item{type}{ the type of prediction to plot on the vertical axis. See \code{predict.gbm}} \item{\dots}{ other arguments passed to the plot function } } \details{ \code{plot.gbm} produces low dimensional projections of the \code{\link{gbm.object}} by integrating out the variables not included in the \code{i.var} argument. The function selects a grid of points and uses the weighted tree traversal method described in Friedman (2001) to do the integration. Based on the variable types included in the projection, \code{plot.gbm} selects an appropriate display choosing amongst line plots, contour plots, and \code{\link[lattice]{lattice}} plots. If the default graphics are not sufficient the user may set \code{return.grid=TRUE}, store the result of the function, and develop another graphic display more appropriate to the particular example. } \value{ Nothing unless \code{return.grid} is true then \code{plot.gbm} produces no graphics and only returns the grid of evaluation points and their average predictions. } \references{ J.H. Friedman (2001). "Greedy Function Approximation: A Gradient Boosting Machine," Annals of Statistics 29(4). } \author{Greg Ridgeway \email{gregridgeway@gmail.com}} \seealso{ \code{\link{gbm}}, \code{\link{gbm.object}}, \code{\link[graphics]{plot}} } \keyword{ hplot } gbm/man/print.gbm.Rd0000644000176000001440000000422412134211007014002 0ustar ripleyusers\name{print.gbm} \alias{print.gbm} \alias{show.gbm} \title{ Print model summary } \description{ Display basic information about a \code{gbm} object. } \usage{ \method{print}{gbm}(x, ...) \method{show}{gbm}(x, ...) } \arguments{ \item{x}{ an object of class \code{gbm}. } \item{\dots}{ arguments passed to \code{print.default}. } } \details{ Prints some information about the model object. In particular, this method prints the call to \code{gbm()}, the type of loss function that was used, and the total number of iterations. If cross-validation was performed, the 'best' number of trees as estimated by cross-validation error is displayed. If a test set was used, the 'best' number of trees as estimated by the test set error is displayed. The number of available predictors, and the number of those having non-zero influence on predictions is given (which might be interesting in data mining applications). If multinomial, bernoulli or adaboost was used, the confusion matrix and prediction accuracy are printed (objects being allocated to the class with highest probability for multinomial and bernoulli). These classifications are performed on the entire training data using the model with the 'best' number of trees as described above, or the maximum number of trees if the 'best' cannot be computed. If the 'distribution' was specified as gaussian, laplace, quantile or t-distribution, a summary of the residuals is displayed. The residuals are for the training data with the model at the 'best' number of trees, as described above, or the maximum number of trees if the 'best' cannot be computed. } \author{ Harry Southworth, Daniel Edwards } \seealso{ \code{\link{gbm}} } \examples{ data(iris) iris.mod <- gbm(Species ~ ., distribution="multinomial", data=iris, n.trees=2000, shrinkage=0.01, cv.folds=5, verbose=FALSE, n.cores=1) iris.mod #data(lung) #lung.mod <- gbm(Surv(time, status) ~ ., distribution="coxph", data=lung, # n.trees=2000, shrinkage=0.01, cv.folds=5,verbose =FALSE) #lung.mod } \keyword{models} \keyword{nonlinear} \keyword{survival} \keyword{nonparametric} gbm/man/gbm.object.Rd0000644000176000001440000000450312142655062014130 0ustar ripleyusers\name{gbm.object} \alias{gbm.object} \title{Generalized Boosted Regression Model Object} \description{These are objects representing fitted \code{gbm}s.} \section{Structure}{The following components must be included in a legitimate \code{gbm} object.} \value{ \item{initF}{the "intercept" term, the initial predicted value to which trees make adjustments} \item{fit}{a vector containing the fitted values on the scale of regression function (e.g. log-odds scale for bernoulli, log scale for poisson)} \item{train.error}{a vector of length equal to the number of fitted trees containing the value of the loss function for each boosting iteration evaluated on the training data} \item{valid.error}{a vector of length equal to the number of fitted trees containing the value of the loss function for each boosting iteration evaluated on the validation data} \item{cv.error}{if \code{cv.folds}<2 this component is NULL. Otherwise, this component is a vector of length equal to the number of fitted trees containing a cross-validated estimate of the loss function for each boosting iteration} \item{oobag.improve}{a vector of length equal to the number of fitted trees containing an out-of-bag estimate of the marginal reduction in the expected value of the loss function. The out-of-bag estimate uses only the training data and is useful for estimating the optimal number of boosting iterations. See \code{\link{gbm.perf}}} \item{trees}{a list containing the tree structures. The components are best viewed using \code{\link{pretty.gbm.tree}}} \item{c.splits}{a list of all the categorical splits in the collection of trees. If the \code{trees[[i]]} component of a \code{gbm} object describes a categorical split then the splitting value will refer to a component of \code{c.splits}. That component of \code{c.splits} will be a vector of length equal to the number of levels in the categorical split variable. -1 indicates left, +1 indicates right, and 0 indicates that the level was not present in the training data} \item{cv.fitted}{If cross-validation was performed, the cross-validation predicted values on the scale of the linear predictor. That is, the fitted values from the ith CV-fold, for the model having been trained on the data in all other folds.} } \author{Greg Ridgeway \email{gregridgeway@gmail.com}} \seealso{ \code{\link{gbm}} } \keyword{methods} gbm/man/validate.Rd0000644000176000001440000000215112102666411013700 0ustar ripleyusers\name{validate.gbm} \alias{validate.gbm} \alias{test.gbm} \alias{test.relative.influence} %- Also NEED an '\alias' for EACH other topic documented here. \title{Test the \code{gbm} package.} \description{ Run tests on \code{gbm} functions to perform logical checks and reproducibility. } \usage{ validate.gbm() } %- maybe also 'usage' for other objects documented here. \details{ The function uses functionality in the \code{RUnit} package. A fairly small validation suite is executed that checks to see that relative influence identifies sensible variables from simulated data, and that predictions from GBMs with Gaussian, Cox or binomial distributions are sensible, } \value{ An object of class \code{RUnitTestData}. See the help for \code{RUnit} for details. } \author{ Harry Southworth } \note{The test suite is not comprehensive.} %% ~Make other sections like Warning with \section{Warning }{....} ~ \seealso{ \code{\link{gbm}} } \examples{ # Uncomment the following lines to run - commented out to make CRAN happy #library(RUnit) #val <- validate.texmex() #printHTMLProtocol(val, "texmexReport.html") } \keyword{models} gbm/man/gbmCrossVal.Rd0000644000176000001440000000557413064144644014354 0ustar ripleyusers\name{gbmCrossVal} \alias{gbmCrossVal} \alias{gbmCrossValModelBuild} \alias{gbmDoFold} \alias{gbmCrossValErr} \alias{gbmCrossValPredictions} \title{Cross-validate a gbm} \description{Functions for cross-validating gbm. These functions are used internally and are not intended for end-user direct usage.} \usage{ gbmCrossVal(cv.folds, nTrain, n.cores, class.stratify.cv, data, x, y, offset, distribution, w, var.monotone, n.trees, interaction.depth, n.minobsinnode, shrinkage, bag.fraction, var.names, response.name, group) gbmCrossValModelBuild(cv.folds, cv.group, n.cores, i.train, x, y, offset, distribution, w, var.monotone, n.trees, interaction.depth, n.minobsinnode, shrinkage, bag.fraction, var.names, response.name, group) gbmDoFold(X, i.train, x, y, offset, distribution, w, var.monotone, n.trees, interaction.depth, n.minobsinnode, shrinkage, bag.fraction, cv.group, var.names, response.name, group, s) gbmCrossValErr(cv.models, cv.folds, cv.group, nTrain, n.trees) gbmCrossValPredictions(cv.models, cv.folds, cv.group, best.iter.cv, distribution, data, y) } \arguments{ \item{cv.folds}{The number of cross-validation folds.} \item{nTrain}{The number of training samples.} \item{n.cores}{The number of cores to use.} \item{class.stratify.cv}{Whether or not stratified cross-validation samples are used.} \item{data}{The data.} \item{x}{The model matrix.} \item{y}{The response variable.} \item{offset}{The offset.} \item{distribution}{The type of loss function. See \code{\link{gbm}}.} \item{w}{Observation weights.} \item{var.monotone}{See \code{\link{gbm}}.} \item{n.trees}{The number of trees to fit.} \item{interaction.depth}{The degree of allowed interactions. See \code{\link{gbm}}.} \item{n.minobsinnode}{See \code{\link{gbm}}.} \item{shrinkage}{See \code{\link{gbm}}.} \item{bag.fraction}{See \code{\link{gbm}}.} \item{var.names}{See \code{\link{gbm}}.} \item{response.name}{See \code{\link{gbm}}.} \item{group}{Used when \code{distribution = "pairwise"}. See \code{\link{gbm}}.} \item{i.train}{Items in the training set.} \item{cv.models}{A list containing the models for each fold.} \item{cv.group}{A vector indicating the cross-validation fold for each member of the training set.} \item{best.iter.cv}{The iteration with lowest cross-validation error.} \item{X}{Index (cross-validation fold) on which to subset.} \item{s}{Random seed.} } % Close arguments \details{ These functions are not intended for end-user direct usage, but are used internally by \code{gbm}.} \value{A list containing the cross-validation error and predictions.} \references{ J.H. Friedman (2001). "Greedy Function Approximation: A Gradient Boosting Machine," Annals of Statistics 29(5):1189-1232. L. Breiman (2001). \href{https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf}{Random Forests}. } \author{Greg Ridgeway \email{gregridgeway@gmail.com}} \seealso{ \code{\link{gbm}} } \keyword{ models } gbm/man/pretty.gbm.tree.Rd0000644000176000001440000000317512134211007015137 0ustar ripleyusers\name{pretty.gbm.tree} \alias{pretty.gbm.tree} \title{ Print gbm tree components } \description{ \code{gbm} stores the collection of trees used to construct the model in a compact matrix structure. This function extracts the information from a single tree and displays it in a slightly more readable form. This function is mostly for debugging purposes and to satisfy some users' curiosity. } \usage{ pretty.gbm.tree(object, i.tree = 1) } \arguments{ \item{object}{ a \code{\link{gbm.object}} initially fit using \code{\link{gbm}}} \item{i.tree}{ the index of the tree component to extract from \code{object} and display } } \value{ \code{pretty.gbm.tree} returns a data frame. Each row corresponds to a node in the tree. Columns indicate \item{SplitVar}{index of which variable is used to split. -1 indicates a terminal node.} \item{SplitCodePred}{if the split variable is continuous then this component is the split point. If the split variable is categorical then this component contains the index of \code{object$c.split} that describes the categorical split. If the node is a terminal node then this is the prediction.} \item{LeftNode}{the index of the row corresponding to the left node.} \item{RightNode}{the index of the row corresponding to the right node.} \item{ErrorReduction}{the reduction in the loss function as a result of splitting this node.} \item{Weight}{the total weight of observations in the node. If weights are all equal to 1 then this is the number of observations in the node.} } \author{Greg Ridgeway \email{gregridgeway@gmail.com}} \seealso{ \code{\link{gbm}}, \code{\link{gbm.object}} } \keyword{ print } gbm/man/relative.influence.Rd0000644000176000001440000000412413064144713015677 0ustar ripleyusers\name{relative.influence} \alias{relative.influence} \alias{permutation.test.gbm} \alias{gbm.loss} \title{ Methods for estimating relative influence } \description{ Helper functions for computing the relative influence of each variable in the gbm object. } \usage{ relative.influence(object, n.trees, scale., sort.) permutation.test.gbm(object, n.trees) gbm.loss(y,f,w,offset,dist,baseline, group, max.rank) } \arguments{ \item{object}{a \code{gbm} object created from an initial call to \code{\link{gbm}}.} \item{n.trees}{ the number of trees to use for computations. If not provided, the the function will guess: if a test set was used in fitting, the number of trees resulting in lowest test set error will be used; otherwise, if cross-validation was performed, the number of trees resulting in lowest cross-validation error will be used; otherwise, all trees will be used.} \item{scale.}{ whether or not the result should be scaled. Defaults to \code{FALSE}.} \item{sort.}{ whether or not the results should be (reverse) sorted. Defaults to \code{FALSE}.} \item{y,f,w,offset,dist,baseline}{For \code{gbm.loss}: These components are the outcome, predicted value, observation weight, offset, distribution, and comparison loss function, respectively.} \item{group, max.rank}{Used internally when \code{distribution = \'pairwise\'}.} } \details{ This is not intended for end-user use. These functions offer the different methods for computing the relative influence in \code{\link{summary.gbm}}. \code{gbm.loss} is a helper function for \code{permutation.test.gbm}. } \value{ By default, returns an unprocessed vector of estimated relative influences. If the \code{scale.} and \code{sort.} arguments are used, returns a processed version of the same. } \references{ J.H. Friedman (2001). "Greedy Function Approximation: A Gradient Boosting Machine," Annals of Statistics 29(5):1189-1232. L. Breiman (2001). \href{https://www.stat.berkeley.edu/~breiman/randomforest2001.pdf}{Random Forests}. } \author{Greg Ridgeway \email{gregridgeway@gmail.com}} \seealso{ \code{\link{summary.gbm}} } \keyword{ hplot } gbm/man/gbm.perf.Rd0000644000176000001440000000353112134211007013602 0ustar ripleyusers\name{gbm.perf} \alias{gbm.perf} \title{GBM performance} \description{ Estimates the optimal number of boosting iterations for a \code{gbm} object and optionally plots various performance measures } \usage{ gbm.perf(object, plot.it = TRUE, oobag.curve = FALSE, overlay = TRUE, method) } \arguments{ \item{object}{a \code{\link{gbm.object}} created from an initial call to \code{\link{gbm}}.} \item{plot.it}{an indicator of whether or not to plot the performance measures. Setting \code{plot.it=TRUE} creates two plots. The first plot plots \code{object$train.error} (in black) and \code{object$valid.error} (in red) versus the iteration number. The scale of the error measurement, shown on the left vertical axis, depends on the \code{distribution} argument used in the initial call to \code{\link{gbm}}.} \item{oobag.curve}{indicates whether to plot the out-of-bag performance measures in a second plot.} \item{overlay}{if TRUE and oobag.curve=TRUE then a right y-axis is added to the training and test error plot and the estimated cumulative improvement in the loss function is plotted versus the iteration number.} \item{method}{indicate the method used to estimate the optimal number of boosting iterations. \code{method="OOB"} computes the out-of-bag estimate and \code{method="test"} uses the test (or validation) dataset to compute an out-of-sample estimate. \code{method="cv"} extracts the optimal number of iterations using cross-validation if \code{gbm} was called with \code{cv.folds}>1} } \value{ \code{gbm.perf} returns the estimated optimal number of iterations. The method of computation depends on the \code{method} argument.} \author{Greg Ridgeway \email{gregridgeway@gmail.com}} \seealso{\code{\link{gbm}}, \code{\link{gbm.object}}} \keyword{nonlinear} \keyword{survival} \keyword{nonparametric} \keyword{tree} gbm/man/calibrate.plot.Rd0000644000176000001440000000563512102666411015024 0ustar ripleyusers\name{calibrate.plot} \alias{calibrate.plot} \title{Calibration plot} \description{ An experimental diagnostic tool that plots the fitted values versus the actual average values. Currently developed for only \code{distribution="bernoulli"}. } \usage{ calibrate.plot(y,p, distribution="bernoulli", replace=TRUE, line.par=list(col="black"), shade.col="lightyellow", shade.density=NULL, rug.par=list(side=1), xlab="Predicted value", ylab="Observed average", xlim=NULL,ylim=NULL, knots=NULL,df=6, ...) } \arguments{ \item{y}{ the outcome 0-1 variable } \item{p}{ the predictions estimating E(y|x) } \item{distribution}{the loss function used in creating \code{p}. \code{bernoulli} and \code{poisson} are currently the only special options. All others default to squared error assuming \code{gaussian}} \item{replace}{ determines whether this plot will replace or overlay the current plot. \code{replace=FALSE} is useful for comparing the calibration of several methods} \item{line.par}{ graphics parameters for the line } \item{shade.col}{ color for shading the 2 SE region. \code{shade.col=NA} implies no 2 SE region} \item{shade.density}{ the \code{density} parameter for \code{\link{polygon}}} \item{rug.par}{graphics parameters passed to \code{\link{rug}}} \item{xlab}{x-axis label corresponding to the predicted values} \item{ylab}{y-axis label corresponding to the observed average} \item{xlim,ylim}{x and y-axis limits. If not specified te function will select limits} \item{knots,df}{these parameters are passed directly to \code{\link[splines]{ns}} for constructing a natural spline smoother for the calibration curve} \item{...}{ other graphics parameters passed on to the plot function } } \details{ Uses natural splines to estimate E(y|p). Well-calibrated predictions imply that E(y|p) = p. The plot also includes a pointwise 95% confidence band. } \value{ \code{calibrate.plot} returns no values. } \references{ J.F. Yates (1982). "External correspondence: decomposition of the mean probability score," Organisational Behaviour and Human Performance 30:132-156. D.J. Spiegelhalter (1986). "Probabilistic Prediction in Patient Management and Clinical Trials," Statistics in Medicine 5:421-433. } \author{Greg Ridgeway \email{gregridgeway@gmail.com}} \examples{ # Don't want R CMD check to think there is a dependency on rpart # so comment out the example #library(rpart) #data(kyphosis) #y <- as.numeric(kyphosis$Kyphosis)-1 #x <- kyphosis$Age #glm1 <- glm(y~poly(x,2),family=binomial) #p <- predict(glm1,type="response") #calibrate.plot(y, p, xlim=c(0,0.6), ylim=c(0,0.6)) } \keyword{ hplot } gbm/man/quantile.rug.Rd0000644000176000001440000000117512134211007014522 0ustar ripleyusers\name{quantile.rug} \alias{quantile.rug} \title{Quantile rug plot} \description{Marks the quantiles on the axes of the current plot.} \usage{ quantile.rug(x,prob=(0:10)/10,...) } \arguments{ \item{x}{a numeric vector.} \item{prob}{the quantiles of x to mark on the x-axis.} \item{...}{additional graphics parameters currently ignored.} } \value{No return values} \author{Greg Ridgeway \email{gregridgeway@gmail.com}} \seealso{ \code{\link[graphics]{plot}}, \code{\link[stats]{quantile}}, \code{\link[base]{jitter}}, \code{\link[graphics]{rug}}. } \examples{ x <- rnorm(100) y <- rnorm(100) plot(x,y) quantile.rug(x) } \keyword{aplot} gbm/man/reconstructGBMdata.Rd0000755000176000001440000000112212134211007015632 0ustar ripleyusers\name{reconstructGBMdata} \Rdversion{1.1} \alias{reconstructGBMdata} \title{Reconstruct a GBM's Source Data} \description{ Helper function to reconstitute the data for plots and summaries. This function is not intended for the user to call directly. } \usage{ reconstructGBMdata(x) } \arguments{ \item{x}{ a \code{\link{gbm.object}} initially fit using \code{\link{gbm}} } } \value{ Returns a data used to fit the gbm in a format that can subsequently be used for plots and summaries } \author{ Harry Southworth } \seealso{ \code{\link{gbm}}, \code{\link{gbm.object}} } \keyword{ manip } gbm/man/gbm.roc.area.Rd0000644000176000001440000000354212102666411014352 0ustar ripleyusers\name{gbm.roc.area} \alias{gbm.roc.area} \alias{gbm.conc} \alias{ir.measure.conc} \alias{ir.measure.auc} \alias{ir.measure.mrr} \alias{ir.measure.map} \alias{ir.measure.ndcg} \alias{perf.pairwise} \title{ Compute Information Retrieval measures. } \description{ Functions to compute Information Retrieval measures for pairwise loss for a single group. The function returns the respective metric, or a negative value if it is undefined for the given group. } \usage{ gbm.roc.area(obs, pred) ir.measure.conc(y.f, max.rank) ir.measure.auc(y.f, max.rank) ir.measure.mrr(y.f, max.rank) ir.measure.map(y.f, max.rank) ir.measure.ndcg(y.f, max.rank) perf.pairwise(y, f, group, metric="ndcg", w=NULL, max.rank=0) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{obs}{Observed value} \item{pred}{Predicted value} \item{metric}{What type of performance measure to compute.} \item{y, y.f, f, w, group, max.rank}{Used internally.} } \details{ For simplicity, we have no special handling for ties; instead, we break ties randomly. This is slightly inaccurate for individual groups, but should have only a small effect on the overall measure. \code{gbm.conc} computes the concordance index: Fraction of all pairs (i,j) with i Define data, use random, ##-- or do help(data=index) for the standard data sets. } \keyword{ models } gbm/man/interact.gbm.Rd0000644000176000001440000000375612142655062014504 0ustar ripleyusers\name{interact.gbm} \alias{interact.gbm} \title{ Estimate the strength of interaction effects } \description{ Computes Friedman's H-statistic to assess the strength of variable interactions. } \usage{ interact.gbm(x, data, i.var = 1, n.trees = x$n.trees) } \arguments{ \item{x}{ a \code{\link{gbm.object}} fitted using a call to \code{\link{gbm}}} \item{data}{ the dataset used to construct \code{x}. If the original dataset is large, a random subsample may be used to accelerate the computation in \code{interact.gbm}} \item{i.var}{a vector of indices or the names of the variables for compute the interaction effect. If using indices, the variables are indexed in the same order that they appear in the initial \code{gbm} formula.} \item{n.trees}{ the number of trees used to generate the plot. Only the first \code{n.trees} trees will be used} } \details{ \code{interact.gbm} computes Friedman's H-statistic to assess the relative strength of interaction effects in non-linear models. H is on the scale of [0-1] with higher values indicating larger interaction effects. To connect to a more familiar measure, if \eqn{x_1} and \eqn{x_2} are uncorrelated covariates with mean 0 and variance 1 and the model is of the form \deqn{y=\beta_0+\beta_1x_1+\beta_2x_2+\beta_3x_3} then \deqn{H=\frac{\beta_3}{\sqrt{\beta_1^2+\beta_2^2+\beta_3^2}}} Note that if the main effects are weak, the estimated H will be unstable. For example, if (in the case of a two-way interaction) neither main effect is in the selected model (relative influence is zero), the result will be 0/0. Also, with weak main effects, rounding errors can result in values of H > 1 which are not possible. } \value{ Returns the value of \eqn{H}. } \references{ J.H. Friedman and B.E. Popescu (2005). \dQuote{Predictive Learning via Rule Ensembles.} Section 8.1 } \author{Greg Ridgeway \email{gregridgeway@gmail.com}} \seealso{ \code{\link{gbm}}, \code{\link{gbm.object}} } \keyword{ methods } gbm/man/gbm.Rd0000644000176000001440000004123412143232225012656 0ustar ripleyusers\name{gbm} \alias{gbm} \alias{gbm.more} \alias{gbm.fit} \title{Generalized Boosted Regression Modeling} \description{Fits generalized boosted regression models.} \usage{ gbm(formula = formula(data), distribution = "bernoulli", data = list(), weights, var.monotone = NULL, n.trees = 100, interaction.depth = 1, n.minobsinnode = 10, shrinkage = 0.001, bag.fraction = 0.5, train.fraction = 1.0, cv.folds=0, keep.data = TRUE, verbose = "CV", class.stratify.cv=NULL, n.cores = NULL) gbm.fit(x, y, offset = NULL, misc = NULL, distribution = "bernoulli", w = NULL, var.monotone = NULL, n.trees = 100, interaction.depth = 1, n.minobsinnode = 10, shrinkage = 0.001, bag.fraction = 0.5, nTrain = NULL, train.fraction = NULL, keep.data = TRUE, verbose = TRUE, var.names = NULL, response.name = "y", group = NULL) gbm.more(object, n.new.trees = 100, data = NULL, weights = NULL, offset = NULL, verbose = NULL) } \arguments{\item{formula}{a symbolic description of the model to be fit. The formula may include an offset term (e.g. y~offset(n)+x). If \code{keep.data=FALSE} in the initial call to \code{gbm} then it is the user's responsibility to resupply the offset to \code{\link{gbm.more}}.} \item{distribution}{either a character string specifying the name of the distribution to use or a list with a component \code{name} specifying the distribution and any additional parameters needed. If not specified, \code{gbm} will try to guess: if the response has only 2 unique values, bernoulli is assumed; otherwise, if the response is a factor, multinomial is assumed; otherwise, if the response has class "Surv", coxph is assumed; otherwise, gaussian is assumed. Currently available options are "gaussian" (squared error), "laplace" (absolute loss), "tdist" (t-distribution loss), "bernoulli" (logistic regression for 0-1 outcomes), "huberized" (huberized hinge loss for 0-1 outcomes), "multinomial" (classification when there are more than 2 classes), "adaboost" (the AdaBoost exponential loss for 0-1 outcomes), "poisson" (count outcomes), "coxph" (right censored observations), "quantile", or "pairwise" (ranking measure using the LambdaMart algorithm). If quantile regression is specified, \code{distribution} must be a list of the form \code{list(name="quantile",alpha=0.25)} where \code{alpha} is the quantile to estimate. The current version's quantile regression method does not handle non-constant weights and will stop. If "tdist" is specified, the default degrees of freedom is 4 and this can be controlled by specifying \code{distribution=list(name="tdist", df=DF)} where \code{DF} is your chosen degrees of freedom. If "pairwise" regression is specified, \code{distribution} must be a list of the form \code{list(name="pairwise",group=...,metric=...,max.rank=...)} (\code{metric} and \code{max.rank} are optional, see below). \code{group} is a character vector with the column names of \code{data} that jointly indicate the group an instance belongs to (typically a query in Information Retrieval applications). For training, only pairs of instances from the same group and with different target labels can be considered. \code{metric} is the IR measure to use, one of \describe{ \item{\code{conc}:}{Fraction of concordant pairs; for binary labels, this is equivalent to the Area under the ROC Curve} \item{\code{mrr}:}{Mean reciprocal rank of the highest-ranked positive instance} \item{\code{map}:}{Mean average precision, a generalization of \code{mrr} to multiple positive instances} \item{\code{ndcg:}}{Normalized discounted cumulative gain. The score is the weighted sum (DCG) of the user-supplied target values, weighted by log(rank+1), and normalized to the maximum achievable value. This is the default if the user did not specify a metric.} } \code{ndcg} and \code{conc} allow arbitrary target values, while binary targets \{0,1\} are expected for \code{map} and \code{mrr}. For \code{ndcg} and \code{mrr}, a cut-off can be chosen using a positive integer parameter \code{max.rank}. If left unspecified, all ranks are taken into account. Note that splitting of instances into training and validation sets follows group boundaries and therefore only approximates the specified \code{train.fraction} ratio (the same applies to cross-validation folds). Internally, queries are randomly shuffled before training, to avoid bias. Weights can be used in conjunction with pairwise metrics, however it is assumed that they are constant for instances from the same group. For details and background on the algorithm, see e.g. Burges (2010). } \item{data}{an optional data frame containing the variables in the model. By default the variables are taken from \code{environment(formula)}, typically the environment from which \code{gbm} is called. If \code{keep.data=TRUE} in the initial call to \code{gbm} then \code{gbm} stores a copy with the object. If \code{keep.data=FALSE} then subsequent calls to \code{\link{gbm.more}} must resupply the same dataset. It becomes the user's responsibility to resupply the same data at this point.} \item{weights}{an optional vector of weights to be used in the fitting process. Must be positive but do not need to be normalized. If \code{keep.data=FALSE} in the initial call to \code{gbm} then it is the user's responsibility to resupply the weights to \code{\link{gbm.more}}.} \item{var.monotone}{an optional vector, the same length as the number of predictors, indicating which variables have a monotone increasing (+1), decreasing (-1), or arbitrary (0) relationship with the outcome.} \item{n.trees}{the total number of trees to fit. This is equivalent to the number of iterations and the number of basis functions in the additive expansion.} \item{cv.folds}{Number of cross-validation folds to perform. If \code{cv.folds}>1 then \code{gbm}, in addition to the usual fit, will perform a cross-validation, calculate an estimate of generalization error returned in \code{cv.error}.} \item{interaction.depth}{The maximum depth of variable interactions. 1 implies an additive model, 2 implies a model with up to 2-way interactions, etc.} \item{n.minobsinnode}{minimum number of observations in the trees terminal nodes. Note that this is the actual number of observations not the total weight.} \item{shrinkage}{a shrinkage parameter applied to each tree in the expansion. Also known as the learning rate or step-size reduction.} \item{bag.fraction}{the fraction of the training set observations randomly selected to propose the next tree in the expansion. This introduces randomnesses into the model fit. If \code{bag.fraction}<1 then running the same model twice will result in similar but different fits. \code{gbm} uses the R random number generator so \code{set.seed} can ensure that the model can be reconstructed. Preferably, the user can save the returned \code{\link{gbm.object}} using \code{\link{save}}.} \item{train.fraction}{The first \code{train.fraction * nrows(data)} observations are used to fit the \code{gbm} and the remainder are used for computing out-of-sample estimates of the loss function.} \item{nTrain}{An integer representing the number of cases on which to train. This is the preferred way of specification for \code{gbm.fit}; The option \code{train.fraction} in \code{gbm.fit} is deprecated and only maintained for backward compatibility. These two parameters are mutually exclusive. If both are unspecified, all data is used for training.} \item{keep.data}{a logical variable indicating whether to keep the data and an index of the data stored with the object. Keeping the data and index makes subsequent calls to \code{\link{gbm.more}} faster at the cost of storing an extra copy of the dataset.} \item{object}{a \code{gbm} object created from an initial call to \code{\link{gbm}}.} \item{n.new.trees}{the number of additional trees to add to \code{object}.} \item{verbose}{If TRUE, gbm will print out progress and performance indicators. If this option is left unspecified for gbm.more then it uses \code{verbose} from \code{object}.} \item{class.stratify.cv}{whether or not the cross-validation should be stratified by class. Defaults to \code{TRUE} for \code{distribution="multinomial"} and is only implementated for \code{multinomial} and \code{bernoulli}. The purpose of stratifying the cross-validation is to help avoiding situations in which training sets do not contain all classes.} \item{x, y}{For \code{gbm.fit}: \code{x} is a data frame or data matrix containing the predictor variables and \code{y} is the vector of outcomes. The number of rows in \code{x} must be the same as the length of \code{y}.} \item{offset}{a vector of values for the offset} \item{misc}{For \code{gbm.fit}: \code{misc} is an R object that is simply passed on to the gbm engine. It can be used for additional data for the specific distribution. Currently it is only used for passing the censoring indicator for the Cox proportional hazards model.} \item{w}{For \code{gbm.fit}: \code{w} is a vector of weights of the same length as the \code{y}.} \item{var.names}{For \code{gbm.fit}: A vector of strings of length equal to the number of columns of \code{x} containing the names of the predictor variables.} \item{response.name}{For \code{gbm.fit}: A character string label for the response variable.} \item{group}{\code{group} used when \code{distribution = 'pairwise'.}} \item{n.cores}{The number of CPU cores to use. The cross-validation loop will attempt to send different CV folds off to different cores. If \code{n.cores} is not specified by the user, it is guessed using the \code{detectCores} function in the \code{parallel} package. Note that the documentation for \code{detectCores} makes clear that it is not failsave and could return a spurious number of available cores.} } \details{See the \href{../doc/gbm.pdf}{gbm vignette} for technical details. This package implements the generalized boosted modeling framework. Boosting is the process of iteratively adding basis functions in a greedy fashion so that each additional basis function further reduces the selected loss function. This implementation closely follows Friedman's Gradient Boosting Machine (Friedman, 2001). In addition to many of the features documented in the Gradient Boosting Machine, \code{gbm} offers additional features including the out-of-bag estimator for the optimal number of iterations, the ability to store and manipulate the resulting \code{gbm} object, and a variety of other loss functions that had not previously had associated boosting algorithms, including the Cox partial likelihood for censored data, the poisson likelihood for count outcomes, and a gradient boosting implementation to minimize the AdaBoost exponential loss function. \code{gbm.fit} provides the link between R and the C++ gbm engine. \code{gbm} is a front-end to \code{gbm.fit} that uses the familiar R modeling formulas. However, \code{\link[stats]{model.frame}} is very slow if there are many predictor variables. For power-users with many variables use \code{gbm.fit}. For general practice \code{gbm} is preferable.} \value{ \code{gbm}, \code{gbm.fit}, and \code{gbm.more} return a \code{\link{gbm.object}}. } \references{ Y. Freund and R.E. Schapire (1997) \dQuote{A decision-theoretic generalization of on-line learning and an application to boosting,} \emph{Journal of Computer and System Sciences,} 55(1):119-139. G. Ridgeway (1999). \dQuote{The state of boosting,} \emph{Computing Science and Statistics} 31:172-181. J.H. Friedman, T. Hastie, R. Tibshirani (2000). \dQuote{Additive Logistic Regression: a Statistical View of Boosting,} \emph{Annals of Statistics} 28(2):337-374. J.H. Friedman (2001). \dQuote{Greedy Function Approximation: A Gradient Boosting Machine,} \emph{Annals of Statistics} 29(5):1189-1232. J.H. Friedman (2002). \dQuote{Stochastic Gradient Boosting,} \emph{Computational Statistics and Data Analysis} 38(4):367-378. B. Kriegler (2007). \href{http://statistics.ucla.edu/theses/uclastat-dissertation-2007:2}{Cost-Sensitive Stochastic Gradient Boosting Within a Quantitative Regression Framework}. PhD dissertation, UCLA Statistics. C. Burges (2010). \dQuote{From RankNet to LambdaRank to LambdaMART: An Overview,} Microsoft Research Technical Report MSR-TR-2010-82. \href{http://sites.google.com/site/gregridgeway}{Greg Ridgeway's site}. The \href{http://www-stat.stanford.edu/~jhf/R-MART.html}{MART} website. } \author{Greg Ridgeway \email{gregridgeway@gmail.com} Quantile regression code developed by Brian Kriegler \email{bk@stat.ucla.edu} t-distribution, and multinomial code developed by Harry Southworth and Daniel Edwards Pairwise code developed by Stefan Schroedl \email{schroedl@a9.com}} \seealso{ \code{\link{gbm.object}}, \code{\link{gbm.perf}}, \code{\link{plot.gbm}}, \code{\link{predict.gbm}}, \code{\link{summary.gbm}}, \code{\link{pretty.gbm.tree}}. } \examples{ # A least squares regression example # create some data N <- 1000 X1 <- runif(N) X2 <- 2*runif(N) X3 <- ordered(sample(letters[1:4],N,replace=TRUE),levels=letters[4:1]) X4 <- factor(sample(letters[1:6],N,replace=TRUE)) X5 <- factor(sample(letters[1:3],N,replace=TRUE)) X6 <- 3*runif(N) mu <- c(-1,0,1,2)[as.numeric(X3)] SNR <- 10 # signal-to-noise ratio Y <- X1**1.5 + 2 * (X2**.5) + mu sigma <- sqrt(var(Y)/SNR) Y <- Y + rnorm(N,0,sigma) # introduce some missing values X1[sample(1:N,size=500)] <- NA X4[sample(1:N,size=300)] <- NA data <- data.frame(Y=Y,X1=X1,X2=X2,X3=X3,X4=X4,X5=X5,X6=X6) # fit initial model gbm1 <- gbm(Y~X1+X2+X3+X4+X5+X6, # formula data=data, # dataset var.monotone=c(0,0,0,0,0,0), # -1: monotone decrease, # +1: monotone increase, # 0: no monotone restrictions distribution="gaussian", # see the help for other choices n.trees=1000, # number of trees shrinkage=0.05, # shrinkage or learning rate, # 0.001 to 0.1 usually work interaction.depth=3, # 1: additive model, 2: two-way interactions, etc. bag.fraction = 0.5, # subsampling fraction, 0.5 is probably best train.fraction = 0.5, # fraction of data for training, # first train.fraction*N used for training n.minobsinnode = 10, # minimum total weight needed in each node cv.folds = 3, # do 3-fold cross-validation keep.data=TRUE, # keep a copy of the dataset with the object verbose=FALSE, # don't print out progress n.cores=1) # use only a single core (detecting #cores is # error-prone, so avoided here) # check performance using an out-of-bag estimator # OOB underestimates the optimal number of iterations best.iter <- gbm.perf(gbm1,method="OOB") print(best.iter) # check performance using a 50\% heldout test set best.iter <- gbm.perf(gbm1,method="test") print(best.iter) # check performance using 5-fold cross-validation best.iter <- gbm.perf(gbm1,method="cv") print(best.iter) # plot the performance # plot variable influence summary(gbm1,n.trees=1) # based on the first tree summary(gbm1,n.trees=best.iter) # based on the estimated best number of trees # compactly print the first and last trees for curiosity print(pretty.gbm.tree(gbm1,1)) print(pretty.gbm.tree(gbm1,gbm1$n.trees)) # make some new data N <- 1000 X1 <- runif(N) X2 <- 2*runif(N) X3 <- ordered(sample(letters[1:4],N,replace=TRUE)) X4 <- factor(sample(letters[1:6],N,replace=TRUE)) X5 <- factor(sample(letters[1:3],N,replace=TRUE)) X6 <- 3*runif(N) mu <- c(-1,0,1,2)[as.numeric(X3)] Y <- X1**1.5 + 2 * (X2**.5) + mu + rnorm(N,0,sigma) data2 <- data.frame(Y=Y,X1=X1,X2=X2,X3=X3,X4=X4,X5=X5,X6=X6) # predict on the new data using "best" number of trees # f.predict generally will be on the canonical scale (logit,log,etc.) f.predict <- predict(gbm1,data2,best.iter) # least squares error print(sum((data2$Y-f.predict)^2)) # create marginal plots # plot variable X1,X2,X3 after "best" iterations par(mfrow=c(1,3)) plot(gbm1,1,best.iter) plot(gbm1,2,best.iter) plot(gbm1,3,best.iter) par(mfrow=c(1,1)) # contour plot of variables 1 and 2 after "best" iterations plot(gbm1,1:2,best.iter) # lattice plot of variables 2 and 3 plot(gbm1,2:3,best.iter) # lattice plot of variables 3 and 4 plot(gbm1,3:4,best.iter) # 3-way plots plot(gbm1,c(1,2,6),best.iter,cont=20) plot(gbm1,1:3,best.iter) plot(gbm1,2:4,best.iter) plot(gbm1,3:5,best.iter) # do another 100 iterations gbm2 <- gbm.more(gbm1,100, verbose=FALSE) # stop printing detailed progress } \keyword{models} \keyword{nonlinear} \keyword{survival} \keyword{nonparametric} \keyword{tree} gbm/man/gbm-internal.Rd0000644000176000001440000000274012134211007014462 0ustar ripleyusers\name{gbm-internal} \alias{guessDist} \alias{getStratify} \alias{getCVgroup} \alias{checkMissing} \alias{checkID} \alias{checkWeights} \alias{checkOffset} \alias{getVarNames} \alias{gbmCluster} \title{gbm internal functions} \description{Helper functions for preprocessing data prior to building the model} \usage{ guessDist(y) getCVgroup(distribution, class.stratify.cv, y, i.train, cv.folds, group) getStratify(strat, d) checkMissing(x, y) checkWeights(w, n) checkID(id) checkOffset(o, y) getVarNames(x) gbmCluster(n) } \arguments{ \item{y}{The response variable} \item{d, distribution}{The distribution, either specified by the user or implied} \item{class.stratify.cv}{Whether or not to stratify, if provided by the user} \item{i.train}{Computed internally by \code{gbm}} \item{group}{The group, if using \code{distibution='pairwise'}} \item{strat}{Whether or not to stratify} \item{cv.folds}{The number of cross-validation folds} \item{x}{The design matrix} \item{id}{The interaction depth} \item{w}{The weights} \item{n}{The number of cores to use in the cluster.} \item{o}{The offset} % \item{verbose}{Whether or not to print output to screen} % \item{X, var.monotone, n.trees, n.minobsinnode, shrinkage, bag.fraction, % var.names, response.name, cv.group}{Arguments passed % through to gbm.fit} } % Close \arguments \details{ These are functions used internally by \code{gbm} and not intended for direct use by the user. } gbm/man/basehaz.gbm.Rd0000644000176000001440000000344212102666411014274 0ustar ripleyusers\name{basehaz.gbm} \alias{basehaz.gbm} \title{ Baseline hazard function } \description{ Computes the Breslow estimator of the baseline hazard function for a proportional hazard regression model } \usage{ basehaz.gbm(t, delta, f.x, t.eval = NULL, smooth = FALSE, cumulative = TRUE) } \arguments{ \item{t}{ the survival times } \item{delta}{ the censoring indicator } \item{f.x}{ the predicted values of the regression model on the log hazard scale } \item{t.eval}{ values at which the baseline hazard will be evaluated } \item{smooth}{ if \code{TRUE} \code{basehaz.gbm} will smooth the estimated baseline hazard using Friedman's super smoother \code{\link{supsmu}}} \item{cumulative}{ if \code{TRUE} the cumulative survival function will be computed } } \details{ The proportional hazard model assumes h(t|x)=lambda(t)*exp(f(x)). \code{\link{gbm}} can estimate the f(x) component via partial likelihood. After estimating f(x), \code{basehaz.gbm} can compute the a nonparametric estimate of lambda(t). } \value{ a vector of length equal to the length of t (or of length \code{t.eval} if \code{t.eval} is not \code{NULL}) containing the baseline hazard evaluated at t (or at \code{t.eval} if \code{t.eval} is not \code{NULL}). If \code{cumulative} is set to \code{TRUE} then the returned vector evaluates the cumulative hazard function at those values. } \references{N. Breslow (1972). "Disussion of `Regression Models and Life-Tables' by D.R. Cox," Journal of the Royal Statistical Society, Series B, 34(2):216-217. N. Breslow (1974). "Covariance analysis of censored survival data," Biometrics 30:89-99. } \author{ Greg Ridgeway \email{gregridgeway@gmail.com}} \seealso{ \code{\link[survival]{survfit}}, \code{\link{gbm}} } \keyword{ methods } \keyword{ survival } gbm/man/gbm-package.Rd0000644000176000001440000000512612143176426014261 0ustar ripleyusers\name{gbm-package} \alias{gbm-package} \docType{package} \title{Generalized Boosted Regression Models} \description{This package implements extensions to Freund and Schapire's AdaBoost algorithm and J. Friedman's gradient boosting machine. Includes regression methods for least squares, absolute loss, logistic, Poisson, Cox proportional hazards partial likelihood, multinomial, t-distribution, AdaBoost exponential loss, Learning to Rank, and Huberized hinge loss.} \details{ \tabular{ll}{ Package: \tab gbm\cr Version: \tab 2.1\cr Date: \tab 2013-05-10\cr Depends: \tab R (>= 2.9.0), survival, lattice, mgcv\cr License: \tab GPL (version 2 or newer)\cr URL: \tab http://code.google.com/p/gradientboostedmodels/\cr } Index: \preformatted{basehaz.gbm Baseline hazard function calibrate.plot Calibration plot gbm Generalized Boosted Regression Modeling gbm.object Generalized Boosted Regression Model Object gbm.perf GBM performance plot.gbm Marginal plots of fitted gbm objects predict.gbm Predict method for GBM Model Fits pretty.gbm.tree Print gbm tree components quantile.rug Quantile rug plot relative.influence Methods for estimating relative influence shrink.gbm L1 shrinkage of the predictor variables in a GBM shrink.gbm.pred Predictions from a shrunked GBM summary.gbm Summary of a gbm object } Further information is available in the following vignettes: \tabular{ll}{ \code{gbm} \tab Generalized Boosted Models: A guide to the gbm package (source, pdf)\cr} } % Close \details \author{ Greg Ridgeway \email{gregridgeway@gmail.com} with contributions by Daniel Edwards, Brian Kriegler, Stefan Schroedl and Harry Southworth. } \references{ Y. Freund and R.E. Schapire (1997) \dQuote{A decision-theoretic generalization of on-line learning and an application to boosting,} \emph{Journal of Computer and System Sciences,} 55(1):119-139. G. Ridgeway (1999). \dQuote{The state of boosting,} \emph{Computing Science and Statistics} 31:172-181. J.H. Friedman, T. Hastie, R. Tibshirani (2000). \dQuote{Additive Logistic Regression: a Statistical View of Boosting,} \emph{Annals of Statistics} 28(2):337-374. J.H. Friedman (2001). \dQuote{Greedy Function Approximation: A Gradient Boosting Machine,} \emph{Annals of Statistics} 29(5):1189-1232. J.H. Friedman (2002). \dQuote{Stochastic Gradient Boosting,} \emph{Computational Statistics and Data Analysis} 38(4):367-378. The \href{http://www-stat.stanford.edu/~jhf/R-MART.html}{MART} website. } % Close \references \keyword{package} gbm/.Rinstignore0000644000176000001440000000026212102666411013352 0ustar ripleyusersinst/doc/gbm.tex inst/doc/srcltx.sty inst/doc/shrinkage-v-iterations.eps inst/doc/shrinkage-v-iterations.pdf inst/doc/oobperf2.eps inst/doc/oobperf2.pdf inst/doc/shrinkageplot.R gbm/LICENSE0000644000176000001440000000122112143202340012037 0ustar ripleyusersGeneralized Boosted Regression package for the R environment Copyright (C) 2003 Greg Ridgeway This program is free software; you can redistribute it and/or modify it under the terms of the GNU General Public License as published by the Free Software Foundation; either version 2 of the License, or (at your option) any later version. This program is distributed in the hope that it will be useful, but WITHOUT ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for more details. Copies of the relevant licenses can be found at: http://www.r-project.org/Licenses/ gbm/CHANGES0000644000176000001440000003330512143200270012035 0ustar ripleyusersChanges in version 2.1 - The cross-validation loop is now parallelized. The functions attempt to guess a sensible number of cores to use, or the user can specify how many through new argument n.cores. - A fair amount of code refactoring. - Added type='response' for predict when distribution='adaboost'. - Fixed a bug that caused offset not to be used if the first element of offset was 0. - Updated predict.gbm and plot.gbm to cope with objects created using gbm version 1.6. - Changed default value of verbose to 'CV'. gbm now defaults to letting the user know which block of CV folds it is running. If verbose=TRUE is specified, the final run of the model also prints its progress to screen as in earlier versions. - Fixed bug that caused predict to return wrong result when distribution == 'multinomial' and length(n.trees) > 1. - Fixed bug that caused n.trees to be wrong in relative.influence if no CV or validation set was used. - Relative influence was computed wrongly when distribution="multinomial". Fixed. - Cross-validation predictions now included in the output object. - Fixed bug in relative.influence that caused labels to be wrong when sort.=TRUE. - Modified interact.gbm to do additional sanity check, updated help file - Fixed bug in interact.gbm so that it now works for distribution="multinomial" - Modified predict.gbm to improve performance on large datasets Changes in version 2.0 Lots of new features added so it warrants a change to the first digit of the version number. Major changes: - Several new distributions are now available thanks to Harry Southworth and Daniel Edwards: multinomial and tdist. - New distribution 'pairwise' for Learning to Rank Applications (LambdaMART), including four different ranking measures, thanks to Stefan Schroedl. - The gbm package is now managed on R-Forge by Greg Ridgeway and Harry Southworth. Visit http://r-forge.r-project.org/projects/gbm/ to get the latest or to contribute to the package Minor changes: - the "quantile" distribution now handles weighted data - relative.influence changed to give names to the returned vector - Added print.gbm and show.gbm. These give basic summaries of the fitted model - Added support function and reconstructGBMdata() to facilitate reconstituting the data for certain plots and summaries - gbm was not using the weights when using cross-validation due to a bug. That's been fixed (Thanks to Trevor Hastie for catching this) - predict.gbm now tries to guess the number of trees, also defaults to using the training data if no newdata is given. - relative.influence has has 2 new arguments, scale. and sort. that default to FALSE. The returned vector now has names. - gbm now tries to guess what distribution you meant if you didn't specify. - gbm has a new argument, class.stratifiy.cv, to control if cross-validation is stratified by class with distribution is "bernoulli" or "multinomial". Defaults to TRUE for multinomial, FALSE for bernoulli. The purpose is to avoid unusable training sets. - gbm.perf now puts a vertical line at the best number of trees when method = "cv" or "test". Tries to guess what method you meant if you don't tell it. - .First.lib had a bug that would crash gbm if gbm was installed as a local library. Fixed. - plot.gbm has a new argument, type, defaulting to "link". For bernoulli, multinomial, poisson, "response" is allowed. - models with large interactions (>24) were using up all the terminal nodes in the stack. The stack has been increased to 101 nodes allowing interaction.depth up to 49. A more graceful error is now issued if interaction.depth exceeds 49. (Thanks to Tom Dietterich for catching this). - gbm now uses the R macro R_NaN in the C++ code rather than NAN, which would not compile on Sun OS. - If covariates marked missing values with NaN instead of NA, the model fit would not be consistent (Thanks to JR Lockwood for noting this) Changes in version 1.6 - Quantile regression is now available thanks to a contribution from Brian Kriegler. Use list(name="quantile",alpha=0.05) as the distribution parameter to construct a predictor of the 5% of the conditional distribution - gbm() now stores cv.folds in the returned gbm object - Added a normalize parameter to summary.gbm that allows one to choose whether or not to normalize the variable influence to sum to 100 or not - Corrected a minor bug in plot.gbm that put the wrong variable label on the x axis when plotting a numeric variable and a factor variable - the C function gbm_plot can now handle missing values. This does not effect the R function plot.gbm(), but it makes gbm_plot potentially more useful for computing partial dependence plots - mgcv is no longer a required package, but the splines package is needed for calibrate.plot() - minor changes for compatibility with R 2.6.0 (thanks to Seth Falcon) - corrected a bug in the cox model computation when all terminal nodes had exactly the minimum number of observations permitted, which caused gbm and R to crash ungracefully. This was likely to occur with small datasets (thanks to Brian Ring) - corrected a bug in Laplace that always made the terminal node predictions slightly larger than the median. Corrected again in a minor release due to a bug caught by Jon McAuliffe - corrected a bug in interact.gbm that caused it to crash for factors. Caught by David Carslaw - added a plot of cross-validated error to the plots generated by gbm.perf Changes in version 1.5 - gbm would fail if there was only one x. Now drop=FALSE is set in all data.frame subsetting (thanks to Gregg Keller for noticing this). - Corrected gbm.perf() to check if bag.fraction=1 and skips trying to create the OOB plots and estimates. - Corrected a typo in the vignette specifying the gradient for the Cox model. - Fixed the OOB-reps.R demo. For non-Gaussian cases it was maximizing the deviance rather than minimizing. - Increased the largest factor variable allowed from 256 levels to 1024 levels. gbm stops if any factor variable exceeds 1024. Will try to make this cleaner in the future. - predict.gbm now allows n.trees to be a vector and efficiently computes predictions for each indicated model. Avoids having to call predict.gbm several times for different choices of n.trees. - fixed a bug that occurred when using cross-validation for coxph. Was computing length(y) when y is a Surv object which return 2*N rather than N. This generated out-of-range indices for the training dataset. - Changed the method for extracting the name of the outcome variable to work around a change in terms.formula() when using "." in formulas. Changes in version 1.4 - The formula interface now allows for "-x" to indicate not including certain variables in the model fit. - Fixed the formula interface to allow offset(). The offset argument has now been removed from gbm(). - Added basehaz.gbm that computes the Breslow estimate of the baseline hazard. At a later stage this will be substituted with a call to survfit, which is much more general handling not only left-censored data. - OOB estimator is known to be conservative. A warning is now issued when using method="OOB" and there is no longer a default method for gbm.perf() - cv.folds now an option to gbm and method="cv" is an option for gbm.perf. Performs v-fold cross validation for estimating the optimal number of iterations - There is now a package vignette with details on the user options and the mathematics behind the gbm engine. Changes in version 1.3 - All likelihood based loss functions are now in terms of Deviance (-2*log likelihood). As a result, gbm always minimizes the loss. Previous versions minimized losses for some choices of distribution and maximized a likelihood for other choices. - Fixed the Poisson regression to avoid predicting +/- infinity which occurs when a terminal node has only observations with y=0. The largest predicted value is now +/-19, similar to what glm predicts for these extreme cases for linear Poisson regression. The shrinkage factor will be applied to the -19 predictions so it will take 1/shrinkage gbm iterations locating pure terminal nodes before gbm would actually return a predicted value of +/-19. - Introduces shrink.gbm.pred() that does a lasso-style variable selection Consider this function as still in an experimental phase. - Bug fix in plot.gbm - All calls to ISNAN now call ISNA (avoids using isnan) Changes in version 1.2 - fixed gbm.object help file and updated the function to check for missing values to the latest R standard. - gbm.plot now allows i.var to be the names of the variables to plot or the index of the variables used - gbm now requires "stats" package into which "modreg" has been merged - documentation for predict.gbm corrected Changes in version 1.1 - all calculations of loss functions now compute averages rather than totals. That is, all performance measures (text of progress, gbm.perf) now report average log-likelihood rather than total log-likelihood (e.g. mean squared error rather than sum of squared error). A slight exception applies to distribution="coxph". For these models the averaging pertains only to the uncensored observations. The denominator is sum(w[i]*delta[i]) rather than the usual sum(w[i]). - summary.gbm now has an experimental "method" argument. The default computes the relative influence as before. The option "method=permutation.test.gbm" performs a permutation test for the relative influence. Give it a try and let me know how it works. It currently is not implemented for "distribution=coxph". - added gbm.fit, a function that avoids the model.frame call, which is tragically slow with lots of variables. gbm is now just a formula/model.frame wrapper for the gbm.fit function. (based on a suggestion and code from Jim Garrett) - corrected a bug in the use of offsets. Now the user must pass the offset vector with the offset argument rather than in the formula. Previously, offsets were being used once as offsets and a second time as a predictor. - predict.gbm now has a single.tree option. When set to TRUE the function will return predictions from only that tree. The idea is that this may be useful for reweighting the trees using a post-model fit adjustment. - corrected a bug in CPoisson::BagImprovement that incorrectly computed the bagged estimate of improvement - corrected a bug for distribution="coxph" in gbm() and gbm.more(). If there was a single predictor the functions would drop the unused array dimension issuing an error. - corrected gbm() distribution="coxph" when train.fraction=1.0. The program would set two non-existant observations in the validation set and issue a warning. - if a predictor variable has no variation a warning (rather than an error) is now issued - updated the documentation for calibrate.plot to match the implementation - changed the some of the default values in gbm(), bag.fraction=0.5, train.fraction=1.0, and shrinkage=0.001. - corrected a bug in predict.gbm. The C code producing the predictions would go into an infinite loop if predicting an observation with a level of a categorical variable not seen in the training dataset. Now the routine uses the missing value prediction. (Feng Zeng) - added a "type" parameter to predict.gbm. The default ("link") is the same as before, predictions are on the canonical scale (gradient scale). The new option ("response") converts back the same scale as the outcome (probability for bernoulli, mean for gaussian, etc.). - gbm and gbm.more now have verbose options which can be set to FALSE to suppress the progress and performance indicators. (several users requested this nice feature) - gbm.perf no longer prints out verbose information about the best iteration estimate. It simply returns the estimate and creates the plots if requested. - ISNAN, since R 1.8.0, R.h changed declarations for ISNAN(). These changes broke gbm 1.0. I added the following code to buildinfo.h to fix this #ifdef IEEE_754 #undef ISNAN #define ISNAN(x) R_IsNaNorNA(x) #endif seems to work now but I'll look for a more elegant solution. Changes in version 0.8 - Additional documentation about the loss functions, graphics, and methods is now available with the package - Fixed the initial value for the adaboost exponential loss. Prior to version 0.8 the initial value was 0.0, now half the baseline log-odds - Changes in some headers and #define's to compile under gcc 3.2 (Brian Ripley) Changes in version 0.7 - gbm.perf, the argument named best.iter.calc has been renamed "method" for greater simplicity - all entries in the design matrix are now coerced to doubles (Thanks to Bonnie Ghosh) - now checks that all predictors are either numeric, ordinal, or factor - summary.gbm now reports the correct relative influence when some variables do not enter the model. (Thanks to Hugh Chipman) - renamed several #define'd variables in buildinfo.h so they do not conflict with standard winerror.h names. Planned future changes 1. Add weighted median functionality to Laplace 2. Automate the fitting process, ie, selecting shrinkage and number of iterations 3. Add overlay factor*continuous predictor plot as an option rather than lattice plots 4. Add multinomial and ordered logistic regression procedures Thanks to RAND for sponsoring the development of this software through statistical methods funding. Kurt Hornik, Brian Ripley, and Jan De Leeuw for helping me get gbm up to the R standard and into CRAN. Dan McCaffrey for testing and evangelizing the utility of this program. Bonnie Ghosh for finding bugs. Arnab Mukherji for testing and suggesting new features. Daniela Golinelli for finding bugs and marrying me. Andrew Morral for suggesting improvements and finding new applications of the method in the evaluation of drug treatment programs. Katrin Hambarsoomians for finding bugs. Hugh Chipman for finding bugs. Jim Garrett for many suggestions and contributions.