party/0000755000176200001440000000000014124331102011400 5ustar liggesusersparty/NAMESPACE0000644000176200001440000000447413436244220012641 0ustar liggesusers importFrom("coin", "logrank_trafo", "trafo", "id_trafo", "of_trafo") importFrom("survival", "Surv", "survfit", "is.Surv") importFrom("modeltools", "ModelEnvFormula", "ParseFormula", "linearModel", "glinearModel", "survReg") importMethodsFrom("modeltools", "dimension", "dpp", "fit", "na.omit", "na.pass") importFrom("methods", "extends", "getSlots", "new", "validObject", "initialize", "show") import("zoo") import("strucchange") import("sandwich") import("grid") import("mvtnorm") importFrom("grDevices", "gray.colors") importFrom("graphics", "barplot", "boxplot", "hist", "lines", "points") importFrom("stats", "weights", "approx", "as.formula", "coef", "complete.cases", "density", "deviance", "fitted", "logLik", "model.matrix", "pchisq", "predict", "qnorm", "quantile", "residuals", "runif", "weighted.mean") importFrom("utils", "head") export(ctree, ctree_control, node_inner, edge_simple, node_terminal, node_surv, node_barplot, node_boxplot, node_hist, node_density, node_scatterplot, node_bivplot, where, nodes, conditionalTree, treeresponse, response, ptrafo, cforest, mob, mob_control, reweight, sctest.mob, cforest_control, varimp, varimpAUC, cforest_classical, cforest_unbiased, proximity, prettytree, party_intern) exportClasses("BinaryTree", "LearningSample", "SplittingNode", "TerminalNode", "TreeControl", "RandomForest", "mob", "ForestControl", "TerminalModelNode") exportMethods("initialize", "fit", "initVariableFrame") S3method(print, BinaryTree) S3method(print, nominalSplit) S3method(print, orderedSplit) S3method(print, SplittingNode) S3method(print, TerminalNode) S3method(print, TerminalModelNode) S3method(print, RandomForest) S3method(plot, BinaryTree) S3method(predict, BinaryTree) S3method(predict, RandomForest) S3method(weights, BinaryTree) S3method(weights, RandomForest) S3method(coef, mob) S3method(deviance, mob) S3method(fitted, mob) S3method(logLik, mob) S3method(plot, mob) S3method(predict, mob) S3method(print, mob) S3method(residuals, mob) S3method(sctest, mob) S3method(summary, mob) S3method(weights, mob) S3method(reweight, glinearModel) S3method(reweight, linearModel) S3method(reweight, survReg) useDynLib(party, .registration = TRUE) party/demo/0000755000176200001440000000000012427624277012352 5ustar liggesusersparty/demo/strucchange-perm.R0000644000176200001440000001735312231152401015727 0ustar liggesusers################### ## Preliminaries ## ################### ## packages library("coin") library("strucchange") library("lattice") ## random seed rseed <- 20061103 ## theme for lattice/trellis graphics trellis.par.set(theme = canonical.theme(color = FALSE)) ###################### ## Boston homicides ## ###################### ## time series plot data("BostonHomicide", package = "strucchange") hom_month <- zoo(as.vector(BostonHomicide$homicides), as.yearmon(as.vector(time(BostonHomicide$homicides)))) hom_year <- aggregate(hom_month, function(x) as.numeric(floor(x)), mean) plot(hom_month, col = grey(0.7), lwd = 2, ylab = "Number of homicides", xlab = "Time") lines(hom_year, type = "b") ## monthly data hom_month <- data.frame( log_homicides = log(coredata(hom_month) + 0.5), time = as.numeric(time(hom_month))) ## asymptotic unconditional test sctest(gefp(log_homicides ~ 1, data = hom_month), functional = supLM(0.1)) ## asymptotic conditional test set.seed(rseed) maxstat_test(log_homicides ~ time, data = hom_month, minprob = 0.1) ## approximate conditional test set.seed(rseed) maxstat_test(log_homicides ~ time, data = hom_month, minprob = 0.1, distribution = approximate(9999)) ## annual data hom_year <- data.frame( homicides = coredata(hom_year), time = time(hom_year)) ## asymptotic unconditional test sctest(gefp(homicides ~ 1, data = hom_year), functional = supLM(0.1)) ## asymptotic conditional test set.seed(rseed) maxstat_test(homicides ~ time, data = hom_year, minprob = 0.1) ## approximate conditional test set.seed(rseed) maxstat_test(homicides ~ time, data = hom_year, minprob = 0.1, distribution = approximate(9999)) ## note that the manuscript computes the exact conditional p-value ## by exhaustive search (rather than approximating via 10,000 simulations) ########################### ## Hiring discrimination ## ########################### ## data hire <- cbind(c(2, 0, 0, 0, 5, 14), c(427, 86, 104, 180, 111, 59)) hire <- data.frame( resp = factor(unlist(sapply(1:nrow(hire), function(i) rep(c("female", "male"), hire[i,])))), time = as.numeric(rep(1991:1996, rowSums(hire)))) ## visualization set.seed(rseed) maxstat_test(resp ~ time, data = hire, distribution = approximate(9999), minprob = 0.05) ################ ## CO2 reflux ## ################ ## data data("CWD", package = "coin") lwood <- reshape(CWD, varying = paste("sample", c(2:4,6:8), sep = ""), timevar = "tree", direction = "long", v.names = "sample") lwood$tree <- factor(lwood$tree) ## visualization print(xyplot(sample ~ time | tree, data = lwood, type = "b", scales = list(y = list(relation = "free")), layout = c(3, 2), xlab = "Time", ylab = expression(paste(CO[2], " reflux")), as.table = TRUE)) ## test (cwd_mt <- maxstat_test(sample2 + sample3 + sample4 + sample6 + sample7 + sample8 ~ trend, data = CWD, distribution = approximate(1e5))) ## maximally selected statistics with 5% critical value cwd_st <- statistic(cwd_mt, "standardized") cwd_st <- data.frame(cwd_st, time = CWD$time[1] + CWD$trend[2:11]) cwd_st <- reshape(cwd_st, varying = paste("sample", c(2:4,6:8), sep = ""), timevar = "tree", direction = "long", v.names = "sample") cwd_st$tree <- factor(cwd_st$tree) cwd_q <- qperm(cwd_mt, 0.95) print(xyplot(sample ~ time | tree, data = cwd_st, type = "b", panel = function(...) { panel.xyplot(...) panel.abline(h = c(-cwd_q, cwd_q), col = 2) panel.abline(h = 0, col = "gray") }, layout = c(3, 2), xlab = "Time", ylab = "Test statistics", ylim = c(-3.5, 3.5), as.table = TRUE)) ################################## ## Dow Jones Industrial Average ## ################################## ## data data("DJIA", package = "strucchange") djia <- diff(log(DJIA)) * 100 djia_res <- coredata(djia - mean(djia)) djia_trafo <- cbind(Intercept = djia_res, Variance = djia_res^2 - mean(djia_res^2)) djia_time <- time(djia) ## visualization plot(djia, xlab = "Time", ylab = "Dow Jones stock returns") ## test set.seed(rseed) djia_mt <- maxstat_test(djia_trafo ~ as.numeric(djia_time), distribution = approximate(9999), minprob = 0.1) ## maximally selected statistics apply(abs(statistic(djia_mt, "standardized")), 2, max) ## critical values qperm(djia_mt, 0.95) ## breakpoint djia_point <- djia_time[floor(length(djia) * 0.1) + which.max(abs(statistic(djia_mt, "standardized")[,2]))] ## autocorrelation djia_pre <- window(djia, end = djia_point) djia_post <- window(djia, start = djia_point + 1) ar1 <- function(x, digits = 3) { x <- coredata(x) x <- x - mean(x) c(acf(x, plot = FALSE)$acf[2], acf(x^2, plot = FALSE)$acf[2]) } ar1(djia_pre) ar1(djia_post) ####################### ## Economic journals ## ####################### ## data data("Journals", package = "AER") Journals$age <- 2000 - Journals$foundingyear Journals <- Journals[order(Journals$age),] ## model in root node jour_lm <- lm(log(subs) ~ log(price/citations), data = Journals) ## test in root node set.seed(rseed) (jour_mt <- maxstat_test(estfun(jour_lm) ~ age, data = Journals, distribution = approximate(9999), minprob = 0.1)) ## test information jour_critval <- qperm(jour_mt, 0.95^(1/4)) jour_process <- statistic(jour_mt, "standardized") jour_process <- zoo(jour_process, as.numeric(sapply(strsplit(rownames(jour_process), "x <= "), tail, 1))) colnames(jour_process) <- c("Intercept", "Slope") jour_point <- time(jour_process)[apply(coredata(abs(jour_process)), 2, which.max)] ## visualization of test in root node mypanel <- function(x, y, subscripts, groups, panel = panel.xyplot, col = 1, type = "p", pch = 20, lty = 1, lwd = 1, ...) { col <- rep(as.list(col), length = nlevels(groups)) type <- rep(as.list(type), length = nlevels(groups)) pch <- rep(as.list(pch), length = nlevels(groups)) lty <- rep(as.list(lty), length = nlevels(groups)) lwd <- rep(as.list(lwd), length = nlevels(groups)) for(g in 1:nlevels(groups)) { idx <- g == groups[subscripts] if (any(idx)) panel(x[idx], y[idx], ..., col = col[[g]], type = type[[g]], pch = pch[[g]], lty = lty[[g]], lwd = lwd[[g]]) grid::grid.lines(y = grid::unit(0, "native"), gp = grid::gpar(col = "gray")) grid::grid.lines(y = grid::unit(jour_critval, "native"), gp = grid::gpar(col = 2)) } } print(xyplot(abs(jour_process), panel = mypanel, xlab = "Time", type = "b", ylim = c(0, 6), as.table = TRUE)) ### fit node with h(Y) = score and g(X) = maxstat_trafo ytrf <- function(data) { ret <- estfun(lm(data[[1]] ~ data[[2]])) attr(ret, "assign") <- 1:2 ret } xtrf <- function(data) trafo(data, numeric_trafo = maxstat_trafo) mynode <- function(data) { vars <- c("society", "pages", "charpp", "age") sapply(vars, function(v) { f <- as.formula(paste("log(subs) + log(price/citations) ~", v)) it <- independence_test(f, data = data, ytrafo = ytrf, xtrafo = xtrf, distribution = approximate(9999)) c(statistic(it), 1 - (1 - pvalue(it))^length(vars)) }) } ## fit all tree elements jour_node <- factor(Journals$age <= 18, levels = c(TRUE, FALSE), labels = c("Node 2", "Node 3")) jour_lm2 <- lm(log(subs) ~ log(price/citations), data = Journals[jour_node == "Node 2",]) jour_lm3 <- lm(log(subs) ~ log(price/citations), data = Journals[jour_node == "Node 3",]) ## conduct tests in all leaves set.seed(rseed) jour_tree <- list( mynode(Journals), mynode(Journals[jour_node == "Node 2",]), mynode(data = Journals[jour_node == "Node 3",]) ) ## fitted models plot(log(subs) ~ log(price/citations), data = Journals, xlab = "log(price/citations)", ylab = "log(subscriptions)", pch = c(24, 21)[jour_node], bg = hcl(c(0, 240), 50, 70)[jour_node]) abline(coef(jour_lm2), col = hcl( 0, 80, 30), lty = 5, lwd = 1.7) abline(coef(jour_lm3), col = hcl(240, 80, 30), lty = 1, lwd = 1.7) legend("bottomleft", c(expression(age > 18), expression(age <= 18)), pch = c(19, 17), lty = c(1, 5), col = hcl(c(240, 0), 80, 30), bty = "n") party/demo/00Index0000644000176200001440000000011612232201476013464 0ustar liggesusersstrucchange-perm Reproduce Zeileis and Hothorn, DOI:10.1007/s00362-013-0503-4 party/data/0000755000176200001440000000000012504274133012323 5ustar liggesusersparty/data/readingSkills.rda0000755000176200001440000000675711674356046015643 0ustar liggesusersX{XMYVPӍ"0}\ $EJz 5KD)Iʵ4)"(!}7f?Zz~o}~:TvTS{yZ$Gw22#qZ{/Oo22_,q(=^?

P{Xr`;pݿ-.wdoY|o=?{|opRL7"ZW+ևHXpvY/Y?.>~O7I%2< R2c^=c$%i_u,NEQ"+JR/,e((5WU%~%:=1) <(GIʯN+D(%<<){%)oq(Vr& Rd$*R6g/3Пl$J*t.I1V+?ɾ"|;鳑tl%:w:YgAT~ ׇCZ畊mt#<Tz}i8 svqC`)jݬ n}ed᠌E>Qw]!X-1! }5W׊/h^foN5٠cyeM7[tr7h^ÒPP*$*86xNfszʥ2f\cl3Ư9]n duYW+f ' v"+;]y L">^΍IkUrve]pE֊G_ә,zp#W TڵS 1ux rK-M`=0<Ӈl:~.y"2o]XzzAx֩P*R1OZMlbCdCk/.2gt${>s 5MJٶCՕ;:AjNo^ PT_ nט cCY kZAxst['c^[B6 \v2њ LкM$ڢeh0( XwV\Ϫ=`s.U*S`+$ kAY{)%vr:H;Ӛ1 t=nܘ 6ԙTݔ+`zo )W6`ߛdԙHK=-++_-*2t`-Ŧ2`ۧ$93$Po88IJ>\p6o踪=e ~)uT#A)kWb2Z^xʃ5x5W6Nm suAōےd Q dIcAl֮O U.f{f5<^7#63-F6>YWeĬ0F} R6WFE:ӾY8" WN4ʠF0`s'㢟jMVM{TfMbK¨aE.]>\VfϦw5: jt}f}zA/YӎRb vN;C]Zfg[]kF/3&6gb }vx[Lp֤r@;s >sXu~j|p.O}"7nP[mN{'0eìzt8D}Zvν5w:C|WBK`Qg1Db䏡ZR#tYua}ubx=X_Y- gu!|bk. = ^=+@~5bCة3UYEn,kXBMV`~T,9y&݄լWsz𲄳_> QݬȒiX؟a\qb !?m¦j$@d*\{]+ay`h u cBJkWT¸192%BY?I-&ij[N>_ ɺ)@:*tع|=3+ ,:U^]qj{-vɼE0}Wr0=S(@v$*j'ҹhLN#sa^UZ՚^ɋ :Uv&*"ǼAmXp4ĵ tyhO ` #Vs!zga ’G?yBZMՙ/щT=ߡ2j[~SRx7Z.̍0%7?-lɘT@GlhZ3 w0hct]upȘ 舆&_䆇^6zw*°' {oL 1y~4y?(DF[,Y>YRk|)40dg2|;[KeZ ē{oo<'-%ݥݪa3a;,+>>y]Nn`)gۤ ̺{v]GqŪ!i05}zXܷi̎m1kzL<NWX޾{{ wr5@tn,@?xfQo npiOsfNF9wRT a ^qpέ4:O]a(=[icRS#l)q/ [P ~Mqk/l'FO{bafՇeY,Z |с̕h B| F׻1a?_и>~O<ͫfp+-`%y׺qܽunR~gdX>Y'pU\}g^.(L[G/?` 'party/man/0000755000176200001440000000000014124275433012171 5ustar liggesusersparty/man/cforest_control.Rd0000644000176200001440000001142613010576655015674 0ustar liggesusers\name{Control Forest Hyper Parameters} \alias{cforest_control} \alias{cforest_classical} \alias{cforest_unbiased} \title{ Control for Conditional Tree Forests } \description{ Various parameters that control aspects of the `cforest' fit via its `control' argument. } \usage{ cforest_unbiased(\dots) cforest_classical(\dots) cforest_control(teststat = "max", testtype = "Teststatistic", mincriterion = qnorm(0.9), savesplitstats = FALSE, ntree = 500, mtry = 5, replace = TRUE, fraction = 0.632, trace = FALSE, \dots) } \arguments{ \item{teststat}{ a character specifying the type of the test statistic to be applied. } \item{testtype}{ a character specifying how to compute the distribution of the test statistic. } \item{mincriterion}{ the value of the test statistic (for \code{testtype == "Teststatistic"}), or 1 - p-value (for other values of \code{testtype}) that must be exceeded in order to implement a split. } \item{mtry}{ number of input variables randomly sampled as candidates at each node for random forest like algorithms. Bagging, as special case of a random forest without random input variable sampling, can be performed by setting \code{mtry} either equal to \code{NULL} or manually equal to the number of input variables.} \item{savesplitstats}{ a logical determining whether the process of standardized two-sample statistics for split point estimate is saved for each primary split.} \item{ntree}{ number of trees to grow in a forest.} \item{replace}{ a logical indicating whether sampling of observations is done with or without replacement.} \item{fraction}{ fraction of number of observations to draw without replacement (only relevant if \code{replace = FALSE}).} \item{trace}{ a logical indicating if a progress bar shall be printed while the forest grows.} \item{\dots}{ additional arguments to be passed to \code{\link{ctree_control}}.} } \details{ All three functions return an object of class \code{\link{ForestControl-class}} defining hyper parameters to be specified via the \code{control} argument of \code{\link{cforest}}. The arguments \code{teststat}, \code{testtype} and \code{mincriterion} determine how the global null hypothesis of independence between all input variables and the response is tested (see \code{\link{ctree}}). The argument \code{nresample} is the number of Monte-Carlo replications to be used when \code{testtype = "MonteCarlo"}. A split is established when the sum of the weights in both daugther nodes is larger than \code{minsplit}, this avoids pathological splits at the borders. When \code{stump = TRUE}, a tree with at most two terminal nodes is computed. The \code{mtry} argument regulates a random selection of \code{mtry} input variables in each node. Note that here \code{mtry} is fixed to the value 5 by default for merely technical reasons, while in \code{\link[randomForest]{randomForest}} the default values for classification and regression vary with the number of input variables. Make sure that \code{mtry} is defined properly before using \code{cforest}. It might be informative to look at scatterplots of input variables against the standardized two-sample split statistics, those are available when \code{savesplitstats = TRUE}. Each node is then associated with a vector whose length is determined by the number of observations in the learning sample and thus much more memory is required. The number of trees \code{ntree} can be increased for large numbers of input variables. Function \code{cforest_unbiased} returns the settings suggested for the construction of unbiased random forests (\code{teststat = "quad", testtype = "Univ", replace = FALSE}) by Strobl et al. (2007) and is the default since version 0.9-90. Hyper parameter settings mimicing the behaviour of \code{\link[randomForest]{randomForest}} are available in \code{cforest_classical} which have been used as default up to version 0.9-14. Please note that \code{\link{cforest}}, in contrast to \code{\link[randomForest]{randomForest}}, doesn't grow trees of maximal depth. To grow large trees, set \code{mincriterion = 0}. } \value{ An object of class \code{\link{ForestControl-class}}. } \references{ Carolin Strobl, Anne-Laure Boulesteix, Achim Zeileis and Torsten Hothorn (2007). Bias in Random Forest Variable Importance Measures: Illustrations, Sources and a Solution. \emph{BMC Bioinformatics}, \bold{8}, 25. DOI: 10.1186/1471-2105-8-25 } \keyword{misc} party/man/mob.Rd0000644000176200001440000001474611674356046013260 0ustar liggesusers\name{mob} \encoding{latin1} \alias{mob} \alias{mob-class} \alias{coef.mob} \alias{deviance.mob} \alias{fitted.mob} \alias{logLik.mob} \alias{predict.mob} \alias{print.mob} \alias{residuals.mob} \alias{sctest.mob} \alias{summary.mob} \alias{weights.mob} \title{Model-based Recursive Partitioning} \description{ MOB is an algorithm for model-based recursive partitioning yielding a tree with fitted models associated with each terminal node. } \usage{ mob(formula, weights, data = list(), na.action = na.omit, model = glinearModel, control = mob_control(), \dots) \method{predict}{mob}(object, newdata = NULL, type = c("response", "node"), \dots) \method{summary}{mob}(object, node = NULL, \dots) \method{coef}{mob}(object, node = NULL, \dots) \method{sctest}{mob}(x, node = NULL, \dots) } \arguments{ \item{formula}{A symbolic description of the model to be fit. This should be of type \code{y ~ x1 + \dots + xk | z1 + \dots + zl} where the variables before the \code{|} are passed to the \code{model} and the variables after the \code{|} are used for partitioning.} \item{weights}{An optional vector of weights to be used in the fitting process. Only non-negative integer valued weights are allowed (default = 1).} \item{data}{A data frame containing the variables in the model.} \item{na.action}{A function which indicates what should happen when the data contain \code{NA}s, defaulting to \code{\link{na.omit}}.} \item{model}{A model of class \code{"\linkS4class{StatModel}"}. See details for requirements.} \item{control}{A list with control parameters as returned by \code{\link{mob_control}}.} \item{\dots}{Additional arguments passed to the \code{fit} call for the \code{model}.} \item{object, x}{A fitted \code{mob} object.} \item{newdata}{A data frame with new inputs, by default the learning data is used.} \item{type}{A character string specifying whether the response should be predicted (inherited from the \code{predict} method for the \code{model}) or the ID of the associated terminal node.} \item{node}{A vector of node IDs for which the corresponding method should be applied.} } \details{ Model-based partitioning fits a model tree using the following algorithm: \enumerate{ \item \code{fit} a \code{model} (default: a generalized linear model \code{"\linkS4class{StatModel}"} with formula \code{y ~ x1 + \dots + xk} for the observations in the current node. \item Assess the stability of the model parameters with respect to each of the partitioning variables \code{z1}, \dots, \code{zl}. If there is some overall instability, choose the variable \code{z} associated with the smallest \eqn{p} value for partitioning, otherwise stop. For performing the parameter instability fluctuation test, a \code{\link[sandwich]{estfun}} method and a \code{\link{weights}} method is needed. \item Search for the locally optimal split in \code{z} by minimizing the objective function of the \code{model}. Typically, this will be something like \code{\link{deviance}} or the negative \code{\link{logLik}} and can be specified in \code{\link{mob_control}}. \item Re-fit the \code{model} in both children, using \code{\link{reweight}} and repeat from step 2. } More details on the conceptual design of the algorithm can be found in Zeileis, Hothorn, Hornik (2008) and some illustrations are provided in \code{vignette("MOB")}. For the fitted MOB tree, several standard methods are inherited if they are available for fitted \code{model}s, such as \code{print}, \code{predict}, \code{residuals}, \code{logLik}, \code{deviance}, \code{weights}, \code{coef} and \code{summary}. By default, the latter four return the result (deviance, weights, coefficients, summary) for all terminal nodes, but take a \code{node} argument that can be set to any node ID. The \code{sctest} method extracts the results of the parameter stability tests (aka structural change tests) for any given node, by default for all nodes. Some examples are given below. } \value{ An object of class \code{mob} inheriting from \code{\link{BinaryTree-class}}. Every node of the tree is additionally associated with a fitted model. } \references{ Achim Zeileis, Torsten Hothorn, and Kurt Hornik (2008). Model-Based Recursive Partitioning. \emph{Journal of Computational and Graphical Statistics}, \bold{17}(2), 492--514. } \seealso{\code{\link{plot.mob}}, \code{\link{mob_control}}} \examples{ set.seed(290875) if(require("mlbench")) { ## recursive partitioning of a linear regression model ## load data data("BostonHousing", package = "mlbench") ## and transform variables appropriately (for a linear regression) BostonHousing$lstat <- log(BostonHousing$lstat) BostonHousing$rm <- BostonHousing$rm^2 ## as well as partitioning variables (for fluctuation testing) BostonHousing$chas <- factor(BostonHousing$chas, levels = 0:1, labels = c("no", "yes")) BostonHousing$rad <- factor(BostonHousing$rad, ordered = TRUE) ## partition the linear regression model medv ~ lstat + rm ## with respect to all remaining variables: fmBH <- mob(medv ~ lstat + rm | zn + indus + chas + nox + age + dis + rad + tax + crim + b + ptratio, control = mob_control(minsplit = 40), data = BostonHousing, model = linearModel) ## print the resulting tree fmBH ## or better visualize it plot(fmBH) ## extract coefficients in all terminal nodes coef(fmBH) ## look at full summary, e.g., for node 7 summary(fmBH, node = 7) ## results of parameter stability tests for that node sctest(fmBH, node = 7) ## -> no further significant instabilities (at 5\% level) ## compute mean squared error (on training data) mean((BostonHousing$medv - fitted(fmBH))^2) mean(residuals(fmBH)^2) deviance(fmBH)/sum(weights(fmBH)) ## evaluate logLik and AIC logLik(fmBH) AIC(fmBH) ## (Note that this penalizes estimation of error variances, which ## were treated as nuisance parameters in the fitting process.) ## recursive partitioning of a logistic regression model ## load data data("PimaIndiansDiabetes", package = "mlbench") ## partition logistic regression diabetes ~ glucose ## wth respect to all remaining variables fmPID <- mob(diabetes ~ glucose | pregnant + pressure + triceps + insulin + mass + pedigree + age, data = PimaIndiansDiabetes, model = glinearModel, family = binomial()) ## fitted model coef(fmPID) plot(fmPID) plot(fmPID, tp_args = list(cdplot = TRUE)) } } \keyword{tree} party/man/prettytree.Rd0000644000176200001440000000075512444054436014677 0ustar liggesusers\name{prettytree} \alias{prettytree} \title{ Print a tree. } \description{ Produces textual output representing a tree. } \usage{ prettytree(x, inames = NULL, ilevels = NULL) } \arguments{ \item{x}{ a recursive list representing a tree. } \item{inames}{ optional variable names. } \item{ilevels}{ an optional list of levels for factors. } } \details{ This function is normally not called by users but needed in some reverse dependencies of party. } \keyword{tree} party/man/Transformations.Rd0000644000176200001440000000367411674356046015672 0ustar liggesusers\name{Transformations} \alias{ptrafo} \alias{ff_trafo} \title{ Function for Data Transformations } \description{ Transformations of Response or Input Variables } \usage{ ptrafo(data, numeric_trafo = id_trafo, factor_trafo = ff_trafo, ordered_trafo = of_trafo, surv_trafo = logrank_trafo, var_trafo = NULL) ff_trafo(x) } \arguments{ \item{data}{an object of class \code{data.frame}.} \item{numeric_trafo}{a function to by applied to \code{numeric} elements of \code{data} returning a matrix with \code{nrow(data)} rows and an arbitrary number of columns.} \item{ordered_trafo}{a function to by applied to \code{ordered} elements of \code{data} returning a matrix with \code{nrow(data)} rows and an arbitrary number of columns (usually some scores).} \item{factor_trafo}{a function to by applied to \code{factor} elements of \code{data} returning a matrix with \code{nrow(data)} rows and an arbitrary number of columns (usually a dummy or contrast matrix).} \item{surv_trafo}{a function to by applied to elements of class \code{Surv} of \code{data} returning a matrix with \code{nrow(data)} rows and an arbitrary number of columns.} \item{var_trafo}{an optional named list of functions to be applied to the corresponding variables in \code{data}.} \item{x}{ a factor} } \details{ \code{trafo} applies its arguments to the elements of \code{data} according to the classes of the elements. See \code{\link[coin]{Transformations}} for more documentation and examples. In the presence of missing values, one needs to make sure that all user-supplied functions deal with that. } \value{ A named matrix with \code{nrow(data)} rows and arbitrary number of columns. } \examples{ ### rank a variable ptrafo(data.frame(y = 1:20), numeric_trafo = function(x) rank(x, na.last = "keep")) ### dummy coding of a factor ptrafo(data.frame(y = gl(3, 9))) } \keyword{manip} party/man/TreeControl-class.Rd0000644000176200001440000000132411674356046016032 0ustar liggesusers\name{TreeControl Class} \docType{class} \alias{TreeControl-class} \alias{TreeControl} \title{Class "TreeControl"} \description{ Objects of this class represent the hyper parameter setting for tree growing. } \section{Objects from the Class}{ Objects can be created by \code{\link{ctree_control}}. } \section{Slots}{ \describe{ \item{\code{varctrl}:}{Object of class \code{"VariableControl"}.} \item{\code{splitctrl}:}{Object of class \code{"SplitControl"}.} \item{\code{gtctrl}:}{Object of class \code{"GlobalTestControl"}.} \item{\code{tgctrl}:}{Object of class \code{"TreeGrowControl"}.} } } \section{Methods}{ No methods defined with class "TreeControl" in the signature. } \keyword{classes} party/man/reweight.Rd0000644000176200001440000000176611674356046014317 0ustar liggesusers\name{reweight} \alias{reweight} \alias{reweight.linearModel} \alias{reweight.glinearModel} \title{Re-fitting Models with New Weights} \description{ Generic function for re-fitting a model object using the same observations but different weights. } \usage{ reweight(object, weights, \dots) } \arguments{ \item{object}{a fitted model object.} \item{weights}{a vector of weights.} \item{\dots}{arguments passed to methods.} } \details{ The method is not unsimilar in spirit to \code{\link[stats]{update}}, but much more narrowly focused. It should return an updated fitted model derived from re-fitting the model on the same observations but using different weights. } \value{The re-weighted fitted model object.} \seealso{\code{\link{update}}} \examples{ ## fit cars regression mf <- dpp(linearModel, dist ~ speed, data = cars) fm <- fit(linearModel, mf) fm ## re-fit, excluding the last 4 observations ww <- c(rep(1, 46), rep(0, 4)) reweight(fm, ww) } \keyword{regression} party/man/SplittingNode-class.Rd0000644000176200001440000000063411674356046016360 0ustar liggesusers\name{SplittingNode Class} \docType{class} \alias{SplittingNode-class} \alias{TerminalNode-class} \alias{TerminalModelNode-class} \title{Class "SplittingNode"} \description{ A list representing the inner node of a binary tree. } \section{Extends}{ Class \code{"list"}, from data part. Class \code{"vector"}, by class \code{"list"}. See \code{\link{BinaryTree-class}} for more details. } \keyword{classes} party/man/plot.BinaryTree.Rd0000644000176200001440000001153414010204125015465 0ustar liggesusers\name{Plot BinaryTree} \encoding{latin1} \alias{plot.BinaryTree} \title{ Visualization of Binary Regression Trees } \description{ \code{plot} method for \code{BinaryTree} objects with extended facilities for plugging in panel functions. } \usage{ \method{plot}{BinaryTree}(x, main = NULL, type = c("extended", "simple"), terminal_panel = NULL, tp_args = list(), inner_panel = node_inner, ip_args = list(), edge_panel = edge_simple, ep_args = list(), drop_terminal = (type[1] == "extended"), tnex = (type[1] == "extended") + 1, newpage = TRUE, pop = TRUE, \dots) } \arguments{ \item{x}{ an object of class \code{BinaryTree}.} \item{main}{ an optional title for the plot.} \item{type}{ a character specifying the complexity of the plot: \code{extended} tries to visualize the distribution of the response variable in each terminal node whereas \code{simple} only gives some summary information.} \item{terminal_panel}{ an optional panel function of the form \code{function(node)} plotting the terminal nodes. Alternatively, a panel generating function of class \code{"grapcon_generator"} that is called with arguments \code{x} and \code{tp_args} to set up a panel function. By default, an appropriate panel function is chosen depending on the scale of the dependent variable.} \item{tp_args}{ a list of arguments passed to \code{terminal_panel} if this is a \code{"grapcon_generator"} object.} \item{inner_panel}{ an optional panel function of the form \code{function(node)} plotting the inner nodes. Alternatively, a panel generating function of class \code{"grapcon_generator"} that is called with arguments \code{x} and \code{ip_args} to set up a panel function.} \item{ip_args}{ a list of arguments passed to \code{inner_panel} if this is a \code{"grapcon_generator"} object.} \item{edge_panel}{ an optional panel function of the form \code{function(split, ordered = FALSE, left = TRUE)} plotting the edges. Alternatively, a panel generating function of class \code{"grapcon_generator"} that is called with arguments \code{x} and \code{ip_args} to set up a panel function.} \item{ep_args}{ a list of arguments passed to \code{edge_panel} if this is a \code{"grapcon_generator"} object.} \item{drop_terminal}{ a logical indicating whether all terminal nodes should be plotted at the bottom.} \item{tnex}{a numeric value giving the terminal node extension in relation to the inner nodes.} \item{newpage}{ a logical indicating whether \code{grid.newpage()} should be called. } \item{pop}{ a logical whether the viewport tree should be popped before return. } \item{\dots}{ additional arguments passed to callies.} } \details{ This \code{plot} method for \code{BinaryTree} objects provides an extensible framework for the visualization of binary regression trees. The user is allowed to specify panel functions for plotting terminal and inner nodes as well as the corresponding edges. Panel functions for plotting inner nodes, edges and terminal nodes are available for the most important cases and can serve as the basis for user-supplied extensions, see \code{\link{node_inner}} and \code{vignette("party")}. More details on the ideas and concepts of panel-generating functions and \code{"grapcon_generator"} objects in general can be found in Meyer, Zeileis and Hornik (2005). } \references{ David Meyer, Achim Zeileis, and Kurt Hornik (2006). The Strucplot Framework: Visualizing Multi-Way Contingency Tables with vcd. \emph{Journal of Statistical Software}, \bold{17}(3). \doi{10.18637/jss.v017.i03} } \seealso{\code{\link{node_inner}}, \code{\link{node_terminal}}, \code{\link{edge_simple}}, \code{\link{node_surv}}, \code{\link{node_barplot}}, \code{\link{node_boxplot}}, \code{\link{node_hist}}, \code{\link{node_density}}} \examples{ set.seed(290875) airq <- subset(airquality, !is.na(Ozone)) airct <- ctree(Ozone ~ ., data = airq) ### regression: boxplots in each node plot(airct, terminal_panel = node_boxplot, drop_terminal = TRUE) if(require("TH.data")) { ## classification: barplots in each node data("GlaucomaM", package = "TH.data") glauct <- ctree(Class ~ ., data = GlaucomaM) plot(glauct) plot(glauct, inner_panel = node_barplot, edge_panel = function(ctreeobj, ...) { function(...) invisible() }, tnex = 1) ## survival: Kaplan-Meier curves in each node data("GBSG2", package = "TH.data") library("survival") gbsg2ct <- ctree(Surv(time, cens) ~ ., data = GBSG2) plot(gbsg2ct) plot(gbsg2ct, type = "simple") } } \keyword{hplot} party/man/mob_control.Rd0000644000176200001440000000346211674356046015011 0ustar liggesusers\name{mob_control} \alias{mob_control} \title{Control Parameters for Model-based Partitioning} \description{ Various parameters that control aspects the fitting algorithm for recursively partitioned \code{\link{mob}} models. } \usage{ mob_control(alpha = 0.05, bonferroni = TRUE, minsplit = 20, trim = 0.1, objfun = deviance, breakties = FALSE, parm = NULL, verbose = FALSE) } \arguments{ \item{alpha}{numeric significance level. A node is splitted when the (possibly Bonferroni-corrected) \eqn{p} value for any parameter stability test in that node falls below \code{alpha}.} \item{bonferroni}{logical. Should \eqn{p} values be Bonferroni corrected?} \item{minsplit}{integer. The minimum number of observations (sum of the weights) in a node.} \item{trim}{numeric. This specifies the trimming in the parameter instability test for the numerical variables. If smaller than 1, it is interpreted as the fraction relative to the current node size.} \item{objfun}{function. A function for extracting the minimized value of the objective function from a fitted model in a node.} \item{breakties}{logical. Should ties in numeric variables be broken randomly for computing the associated parameter instability test?} \item{parm}{numeric or character. Number or name of model parameters included in the parameter instability tests (by default all parameters are included).} \item{verbose}{logical. Should information about the fitting process of \code{\link{mob}} (such as test statistics, \eqn{p} values, selected splitting variables and split points) be printed to the screen?} } \details{ See \code{\link{mob}} for more details and references. } \seealso{\code{\link{mob}}} \value{ A list of class \code{mob_control} containing the control parameters. } \keyword{misc} party/man/LearningSample-class.Rd0000644000176200001440000000203611674356046016474 0ustar liggesusers\name{LearningSample Class} \docType{class} \alias{LearningSample-class} \title{Class "LearningSample"} \description{ Objects of this class represent data for fitting tree-based models. } \section{Objects from the Class}{ Objects can be created by calls of the form \code{new("LearningSample", ...)}. } \section{Slots}{ \describe{ \item{\code{responses}:}{Object of class \code{"VariableFrame"} with the response variables. } \item{\code{inputs}:}{Object of class \code{"VariableFrame"} with the input variables.} \item{\code{weights}:}{Object of class \code{"numeric"}, a vector of case counts or weights. } \item{\code{nobs}:}{Object of class \code{"integer"}, the number of observations. } \item{\code{ninputs}:}{Object of class \code{"integer"}, the number of input variables.} } } \section{Methods}{ No methods defined with class "LearningSample" in the signature. } \keyword{classes} party/man/initVariableFrame-methods.Rd0000644000176200001440000000104112207645122017475 0ustar liggesusers\name{initVariableFrame-methods} \docType{methods} \alias{initVariableFrame} \alias{initVariableFrame-methods} \alias{initVariableFrame,data.frame-method} \alias{initVariableFrame,matrix-method} \title{Set-up VariableFrame objects} \description{ Set-up VariableFrame objects } \section{Methods}{ These methods are not to be called by the user. \describe{ \item{\code{signature(obj = "data.frame")}}{ converges a data frame to VariableFrame } \item{\code{signature(obj = "matrix")}}{ converges a matrix to VariableFrame } }} \keyword{methods} party/man/plot.mob.Rd0000644000176200001440000000614611674356046014230 0ustar liggesusers\name{plot.mob} \alias{plot.mob} \title{ Visualization of MOB Trees } \description{ \code{plot} method for \code{mob} objects with extended facilities for plugging in panel functions. } \usage{ \method{plot}{mob}(x, terminal_panel = node_bivplot, tnex = NULL, \dots) } \arguments{ \item{x}{an object of class \code{mob}.} \item{terminal_panel}{a panel function or panel-generating function of class \code{"grapcon_generator"}. See \code{\link{plot.BinaryTree}} for more details.} \item{tnex}{a numeric value giving the terminal node extension in relation to the inner nodes.} \item{\dots}{ further arguments passed to \code{\link{plot.BinaryTree}}.} } \details{ This \code{plot} method for \code{mob} objects simply calls the \code{\link{plot.BinaryTree}} method, setting a different \code{terminal_panel} function by default (\code{\link{node_bivplot}}) and \code{tnex} value. } \seealso{\code{\link{node_bivplot}}, \code{\link{node_scatterplot}}, \code{\link{plot.BinaryTree}}, \code{\link{mob}}} \examples{ set.seed(290875) if(require("mlbench")) { ## recursive partitioning of a linear regression model ## load data data("BostonHousing", package = "mlbench") ## and transform variables appropriately (for a linear regression) BostonHousing$lstat <- log(BostonHousing$lstat) BostonHousing$rm <- BostonHousing$rm^2 ## as well as partitioning variables (for fluctuation testing) BostonHousing$chas <- factor(BostonHousing$chas, levels = 0:1, labels = c("no", "yes")) BostonHousing$rad <- factor(BostonHousing$rad, ordered = TRUE) ## partition the linear regression model medv ~ lstat + rm ## with respect to all remaining variables: fm <- mob(medv ~ lstat + rm | zn + indus + chas + nox + age + dis + rad + tax + crim + b + ptratio, control = mob_control(minsplit = 40), data = BostonHousing, model = linearModel) ## visualize medv ~ lstat and medv ~ rm plot(fm) ## visualize only one of the two regressors plot(fm, tp_args = list(which = "lstat"), tnex = 2) plot(fm, tp_args = list(which = 2), tnex = 2) ## omit fitted mean lines plot(fm, tp_args = list(fitmean = FALSE)) ## mixed numerical and categorical regressors fm2 <- mob(medv ~ lstat + rm + chas | zn + indus + nox + age + dis + rad, control = mob_control(minsplit = 100), data = BostonHousing, model = linearModel) plot(fm2) ## recursive partitioning of a logistic regression model data("PimaIndiansDiabetes", package = "mlbench") fmPID <- mob(diabetes ~ glucose | pregnant + pressure + triceps + insulin + mass + pedigree + age, data = PimaIndiansDiabetes, model = glinearModel, family = binomial()) ## default plot: spinograms with breaks from five point summary plot(fmPID) ## use the breaks from hist() instead plot(fmPID, tp_args = list(fivenum = FALSE)) ## user-defined breaks plot(fmPID, tp_args = list(breaks = 0:4 * 50)) ## CD plots instead of spinograms plot(fmPID, tp_args = list(cdplot = TRUE)) ## different smoothing bandwidth plot(fmPID, tp_args = list(cdplot = TRUE, bw = 15)) } } \keyword{hplot} party/man/panelfunctions.Rd0000644000176200001440000001721214010204107015473 0ustar liggesusers\name{Panel Generating Functions} \encoding{latin1} \alias{node_inner} \alias{node_terminal} \alias{edge_simple} \alias{node_surv} \alias{node_barplot} \alias{node_boxplot} \alias{node_hist} \alias{node_density} \alias{node_scatterplot} \alias{node_bivplot} \title{ Panel-Generators for Visualization of Party Trees } \description{ The plot method for \code{BinaryTree} and \code{mob} objects are rather flexible and can be extended by panel functions. Some pre-defined panel-generating functions of class \code{grapcon_generator} for the most important cases are documented here. } \usage{ node_inner(ctreeobj, digits = 3, abbreviate = FALSE, fill = "white", pval = TRUE, id = TRUE) node_terminal(ctreeobj, digits = 3, abbreviate = FALSE, fill = c("lightgray", "white"), id = TRUE) edge_simple(treeobj, digits = 3, abbreviate = FALSE) node_surv(ctreeobj, ylines = 2, id = TRUE, \dots) node_barplot(ctreeobj, col = "black", fill = NULL, beside = NULL, ymax = NULL, ylines = NULL, widths = 1, gap = NULL, reverse = NULL, id = TRUE) node_boxplot(ctreeobj, col = "black", fill = "lightgray", width = 0.5, yscale = NULL, ylines = 3, cex = 0.5, id = TRUE) node_hist(ctreeobj, col = "black", fill = "lightgray", freq = FALSE, horizontal = TRUE, xscale = NULL, ymax = NULL, ylines = 3, id = TRUE, \dots) node_density(ctreeobj, col = "black", rug = TRUE, horizontal = TRUE, xscale = NULL, yscale = NULL, ylines = 3, id = TRUE) node_scatterplot(mobobj, which = NULL, col = "black", linecol = "red", cex = 0.5, pch = NULL, jitter = FALSE, xscale = NULL, yscale = NULL, ylines = 1.5, id = TRUE, labels = FALSE) node_bivplot(mobobj, which = NULL, id = TRUE, pop = TRUE, pointcol = "black", pointcex = 0.5, boxcol = "black", boxwidth = 0.5, boxfill = "lightgray", fitmean = TRUE, linecol = "red", cdplot = FALSE, fivenum = TRUE, breaks = NULL, ylines = NULL, xlab = FALSE, ylab = FALSE, margins = rep(1.5, 4), \dots) } \arguments{ \item{ctreeobj}{ an object of class \code{BinaryTree}.} \item{treeobj}{ an object of class \code{BinaryTree} or \code{mob}.} \item{mobobj}{ an object of class \code{mob}.} \item{digits}{ integer, used for formating numbers. } \item{abbreviate}{ logical indicating whether strings should be abbreviated. } \item{col, pointcol}{ a color for points and lines. } \item{fill}{ a color to filling rectangles. } \item{pval}{ logical. Should p values be plotted?} \item{id}{ logical. Should node IDs be plotted?} \item{ylines}{ number of lines for spaces in y-direction. } \item{widths}{ widths in barplots. } \item{width, boxwidth}{ width in boxplots. } \item{gap}{ gap between bars in a barplot (\code{node_barplot}). } \item{yscale}{ limits in y-direction} \item{xscale}{ limits in x-direction} \item{ymax}{ upper limit in y-direction} \item{beside}{ logical indicating if barplots should be side by side or stacked. } \item{reverse}{logical indicating whether the order of levels should be reversed for barplots.} \item{horizontal}{ logical indicating if the plots should be horizontal. } \item{freq}{logical; if \code{TRUE}, the histogram graphic is a representation of frequencies. If \code{FALSE}, probabilities are plotted.} \item{rug}{logical indicating if a rug representation should be added. } \item{which}{ numeric or character vector indicating which of the regressor variables should be plotted (default = all).} \item{linecol}{ color for fitted model lines.} \item{cex, pointcex}{character extension of points in scatter plots.} \item{pch}{plotting character of points in scatter plots.} \item{jitter}{logical. Should the points be jittered in y-direction?} \item{labels}{logical. Should axis labels be plotted?} \item{pop}{logical. Should the panel viewports be popped?} \item{boxcol}{color for box plot borders.} \item{boxfill}{fill color for box plots.} \item{fitmean}{logical. Should lines for the predicted means from the model be added?} \item{cdplot}{logical. Should CD plots (or spinograms) be used for visualizing the dependence of a categorical on a numeric variable?} \item{fivenum}{logical. When using spinograms, should the five point summary of the explanatory variable be used for determining the breaks?} \item{breaks}{a (list of) numeric vector(s) of breaks for the spinograms. If set to \code{NULL} (the default), the \code{breaks} are chosen according to the \code{fivenum} argument.} \item{xlab, ylab}{ character with x- and y-axis labels. Can also be logical: if \code{FALSE} axis labels are surpressed, if \code{TRUE} they are taken from the underlying data. Can be a vector of labels for \code{xlab}. } \item{margins}{margins of the viewports.} \item{\dots}{ additional arguments passed to callies.} } \details{ The \code{plot} methods for \code{BinaryTree} and \code{mob} objects provide an extensible framework for the visualization of binary regression trees. The user is allowed to specify panel functions for plotting terminal and inner nodes as well as the corresponding edges. The panel functions to be used should depend only on the node being visualzied, however, for setting up an appropriate panel function, information from the whole tree is typically required. Hence, \pkg{party} adopts the framework of \code{grapcon_generator} (graphical appearance control) from the \pkg{vcd} package (Meyer, Zeileis and Hornik, 2005) and provides several panel-generating functions. For convenience, the panel-generating functions \code{node_inner} and \code{edge_simple} return panel functions to draw inner nodes and left and right edges. For drawing terminal nodes, the functions returned by the other panel functions can be used. The panel generating function \code{node_terminal} is a terse text-based representation of terminal nodes. Graphical representations of terminal nodes are available and depend on the kind of model and the measurement scale of the variables modelled. For univariate regressions (typically fitted by \code{ctree}), \code{node_surv} returns a functions that plots Kaplan-Meier curves in each terminal node; \code{node_barplot}, \code{node_boxplot}, \code{node_hist} and \code{node_density} can be used to plot bar plots, box plots, histograms and estimated densities into the terminal nodes. For multivariate regressions (typically fitted by \code{mob}), \code{node_bivplot} returns a panel function that creates bivariate plots of the response against all regressors in the model. Depending on the scale of the variables involved, scatter plots, box plots, spinograms (or CD plots) and spine plots are created. For the latter two \code{\link[vcd]{spine}} and \code{\link[vcd]{cd_plot}} from the \pkg{vcd} package are re-used. } \references{ David Meyer, Achim Zeileis, and Kurt Hornik (2006). The Strucplot Framework: Visualizing Multi-Way Contingency Tables with vcd. \emph{Journal of Statistical Software}, \bold{17}(3). \doi{10.18637/jss.v017.i03} } \examples{ set.seed(290875) airq <- subset(airquality, !is.na(Ozone)) airct <- ctree(Ozone ~ ., data = airq) ## default: boxplots plot(airct) ## change colors plot(airct, tp_args = list(col = "blue", fill = hsv(2/3, 0.5, 1))) ## equivalent to plot(airct, terminal_panel = node_boxplot(airct, col = "blue", fill = hsv(2/3, 0.5, 1))) ### very simple; the mean is given in each terminal node plot(airct, type = "simple") ### density estimates plot(airct, terminal_panel = node_density) ### histograms plot(airct, terminal_panel = node_hist(airct, ymax = 0.06, xscale = c(0, 250))) } \keyword{hplot} party/man/ctree_control.Rd0000644000176200001440000000706514027356753015341 0ustar liggesusers\name{Control ctree Hyper Parameters} \alias{ctree_control} \title{ Control for Conditional Inference Trees } \description{ Various parameters that control aspects of the `ctree' fit. } \usage{ ctree_control(teststat = c("quad", "max"), testtype = c("Bonferroni", "MonteCarlo", "Univariate", "Teststatistic"), mincriterion = 0.95, minsplit = 20, minbucket = 7, stump = FALSE, nresample = 9999, maxsurrogate = 0, mtry = 0, savesplitstats = TRUE, maxdepth = 0, remove_weights = FALSE) } \arguments{ \item{teststat}{ a character specifying the type of the test statistic to be applied. } \item{testtype}{ a character specifying how to compute the distribution of the test statistic. } \item{mincriterion}{ the value of the test statistic (for \code{testtype == "Teststatistic"}), or 1 - p-value (for other values of \code{testtype}) that must be exceeded in order to implement a split. } \item{minsplit}{ the minimum sum of weights in a node in order to be considered for splitting. } \item{minbucket}{ the minimum sum of weights in a terminal node. } \item{stump}{ a logical determining whether a stump (a tree with three nodes only) is to be computed. } \item{nresample}{ number of Monte-Carlo replications to use when the distribution of the test statistic is simulated.} \item{maxsurrogate}{ number of surrogate splits to evaluate. Note that currently only surrogate splits in ordered covariables are implemented. } \item{mtry}{ number of input variables randomly sampled as candidates at each node for random forest like algorithms. The default \code{mtry = 0} means that no random selection takes place.} \item{savesplitstats}{ a logical determining if the process of standardized two-sample statistics for split point estimate is saved for each primary split.} \item{maxdepth}{ maximum depth of the tree. The default \code{maxdepth = 0} means that no restrictions are applied to tree sizes.} \item{remove_weights}{ a logical determining if weights attached to nodes shall be removed after fitting the tree.} } \details{ The arguments \code{teststat}, \code{testtype} and \code{mincriterion} determine how the global null hypothesis of independence between all input variables and the response is tested (see \code{\link{ctree}}). The argument \code{nresample} is the number of Monte-Carlo replications to be used when \code{testtype = "MonteCarlo"}. A split is established when the sum of the weights in both daugther nodes is larger than \code{minsplit}, this avoids pathological splits at the borders. When \code{stump = TRUE}, a tree with at most two terminal nodes is computed. The argument \code{mtry > 0} means that a random forest like `variable selection', i.e., a random selection of \code{mtry} input variables, is performed in each node. It might be informative to look at scatterplots of input variables against the standardized two-sample split statistics, those are available when \code{savesplitstats = TRUE}. Each node is then associated with a vector whose length is determined by the number of observations in the learning sample and thus much more memory is required. } \value{ An object of class \code{\link{TreeControl}}. } \keyword{misc} party/man/cforest.Rd0000644000176200001440000002116614010221452014115 0ustar liggesusers\name{cforest} \alias{cforest} \alias{proximity} \title{ Random Forest } \description{ An implementation of the random forest and bagging ensemble algorithms utilizing conditional inference trees as base learners. } \usage{ cforest(formula, data = list(), subset = NULL, weights = NULL, controls = cforest_unbiased(), xtrafo = ptrafo, ytrafo = ptrafo, scores = NULL) proximity(object, newdata = NULL) } \arguments{ \item{formula}{ a symbolic description of the model to be fit. Note that symbols like \code{:} and \code{-} will not work and the tree will make use of all variables listed on the rhs of \code{formula}.} \item{data}{ an data frame containing the variables in the model. } \item{subset}{ an optional vector specifying a subset of observations to be used in the fitting process.} \item{weights}{ an optional vector of weights to be used in the fitting process. Non-negative integer valued weights are allowed as well as non-negative real weights. Observations are sampled (with or without replacement) according to probabilities \code{weights / sum(weights)}. The fraction of observations to be sampled (without replacement) is computed based on the sum of the weights if all weights are integer-valued and based on the number of weights greater zero else. Alternatively, \code{weights} can be a double matrix defining case weights for all \code{ncol(weights)} trees in the forest directly. This requires more storage but gives the user more control.} \item{controls}{an object of class \code{\link{ForestControl-class}}, which can be obtained using \code{\link{cforest_control}} (and its convenience interfaces \code{cforest_unbiased} and \code{cforest_classical}).} \item{xtrafo}{ a function to be applied to all input variables. By default, the \code{\link{ptrafo}} function is applied.} \item{ytrafo}{ a function to be applied to all response variables. By default, the \code{\link{ptrafo}} function is applied.} \item{scores}{ an optional named list of scores to be attached to ordered factors.} \item{object}{ an object as returned by \code{cforest}.} \item{newdata}{ an optional data frame containing test data.} } \details{ This implementation of the random forest (and bagging) algorithm differs from the reference implementation in \code{\link[randomForest]{randomForest}} with respect to the base learners used and the aggregation scheme applied. Conditional inference trees, see \code{\link{ctree}}, are fitted to each of the \code{ntree} (defined via \code{\link{cforest_control}}) bootstrap samples of the learning sample. Most of the hyper parameters in \code{\link{cforest_control}} regulate the construction of the conditional inference trees. Therefore you MUST NOT change anything you don't understand completely. Hyper parameters you might want to change in \code{\link{cforest_control}} are: 1. The number of randomly preselected variables \code{mtry}, which is fixed to the value 5 by default here for technical reasons, while in \code{\link[randomForest]{randomForest}} the default values for classification and regression vary with the number of input variables. 2. The number of trees \code{ntree}. Use more trees if you have more variables. 3. The depth of the trees, regulated by \code{mincriterion}. Usually unstopped and unpruned trees are used in random forests. To grow large trees, set \code{mincriterion} to a small value. The aggregation scheme works by averaging observation weights extracted from each of the \code{ntree} trees and NOT by averaging predictions directly as in \code{\link[randomForest]{randomForest}}. See Hothorn et al. (2004) for a description. Predictions can be computed using \code{\link{predict}}. For observations with zero weights, predictions are computed from the fitted tree when \code{newdata = NULL}. While \code{\link{predict}} returns predictions of the same type as the response in the data set by default (i.e., predicted class labels for factors), \code{\link{treeresponse}} returns the statistics of the conditional distribution of the response (i.e., predicted class probabilities for factors). The same is done by \code{predict(..., type = "prob")}. Note that for multivariate responses \code{predict} does not convert predictions to the type of the response, i.e., \code{type = "prob"} is used. Ensembles of conditional inference trees have not yet been extensively tested, so this routine is meant for the expert user only and its current state is rather experimental. However, there are some things available in \code{\link{cforest}} that can't be done with \code{\link[randomForest]{randomForest}}, for example fitting forests to censored response variables (see Hothorn et al., 2006a) or to multivariate and ordered responses. Moreover, when predictors vary in their scale of measurement of number of categories, variable selection and computation of variable importance is biased in favor of variables with many potential cutpoints in \code{\link[randomForest]{randomForest}}, while in \code{\link{cforest}} unbiased trees and an adequate resampling scheme are used by default. See Hothorn et al. (2006b) and Strobl et al. (2007) as well as Strobl et al. (2009). The \code{proximity} matrix is an \eqn{n \times n} matrix \eqn{P} with \eqn{P_{ij}} equal to the fraction of trees where observations \eqn{i} and \eqn{j} are element of the same terminal node (when both \eqn{i} and \eqn{j} had non-zero weights in the same bootstrap sample). } \value{ An object of class \code{\link{RandomForest-class}}. } \references{ Leo Breiman (2001). Random Forests. \emph{Machine Learning}, 45(1), 5--32. Torsten Hothorn, Berthold Lausen, Axel Benner and Martin Radespiel-Troeger (2004). Bagging Survival Trees. \emph{Statistics in Medicine}, \bold{23}(1), 77--91. Torsten Hothorn, Peter Buhlmann, Sandrine Dudoit, Annette Molinaro and Mark J. van der Laan (2006a). Survival Ensembles. \emph{Biostatistics}, \bold{7}(3), 355--373. Torsten Hothorn, Kurt Hornik and Achim Zeileis (2006b). Unbiased Recursive Partitioning: A Conditional Inference Framework. \emph{Journal of Computational and Graphical Statistics}, \bold{15}(3), 651--674. Preprint available from \url{https://www.zeileis.org/papers/Hothorn+Hornik+Zeileis-2006.pdf} Carolin Strobl, Anne-Laure Boulesteix, Achim Zeileis and Torsten Hothorn (2007). Bias in Random Forest Variable Importance Measures: Illustrations, Sources and a Solution. \emph{BMC Bioinformatics}, \bold{8}, 25. \doi{10.1186/1471-2105-8-25} Carolin Strobl, James Malley and Gerhard Tutz (2009). An Introduction to Recursive Partitioning: Rationale, Application, and Characteristics of Classification and Regression Trees, Bagging, and Random forests. \emph{Psychological Methods}, \bold{14}(4), 323--348. } \examples{ set.seed(290875) ### honest (i.e., out-of-bag) cross-classification of ### true vs. predicted classes data("mammoexp", package = "TH.data") table(mammoexp$ME, predict(cforest(ME ~ ., data = mammoexp, control = cforest_unbiased(ntree = 50)), OOB = TRUE)) ### fit forest to censored response if (require("TH.data") && require("survival")) { data("GBSG2", package = "TH.data") bst <- cforest(Surv(time, cens) ~ ., data = GBSG2, control = cforest_unbiased(ntree = 50)) ### estimate conditional Kaplan-Meier curves treeresponse(bst, newdata = GBSG2[1:2,], OOB = TRUE) ### if you can't resist to look at individual trees ... party:::prettytree(bst@ensemble[[1]], names(bst@data@get("input"))) } ### proximity, see ?randomForest iris.cf <- cforest(Species ~ ., data = iris, control = cforest_unbiased(mtry = 2)) iris.mds <- cmdscale(1 - proximity(iris.cf), eig = TRUE) op <- par(pty="s") pairs(cbind(iris[,1:4], iris.mds$points), cex = 0.6, gap = 0, col = c("red", "green", "blue")[as.numeric(iris$Species)], main = "Iris Data: Predictors and MDS of Proximity Based on cforest") par(op) } \keyword{tree} party/man/RandomForest-class.Rd0000644000176200001440000000471712265463363016204 0ustar liggesusers\name{RandomForest-class} \docType{class} \alias{RandomForest-class} \alias{treeresponse,RandomForest-method} \alias{weights,RandomForest-method} \alias{where,RandomForest-method} \alias{show,RandomForest-method} \title{Class "RandomForest"} \description{A class for representing random forest ensembles. } \section{Objects from the Class}{ Objects can be created by calls of the form \code{new("RandomForest", ...)}. } \section{Slots}{ \describe{ \item{\code{ensemble}:}{Object of class \code{"list"}, each element being an object of class \code{"\linkS4class{BinaryTree}"}.} \item{\code{data}:}{ an object of class \code{"\linkS4class{ModelEnv}"}.} \item{\code{initweights}:}{ a vector of initial weights.} \item{\code{weights}:}{ a list of weights defining the sub-samples.} \item{\code{where}:}{ a matrix of integers vectors of length n (number of observations in the learning sample) giving the number of the terminal node the corresponding observations is element of (in each tree).} \item{\code{data}:}{ an object of class \code{"\linkS4class{ModelEnv}"}.} \item{\code{responses}:}{ an object of class \code{"VariableFrame"} storing the values of the response variable(s). } \item{\code{cond_distr_response}:}{ a function computing the conditional distribution of the response. } \item{\code{predict_response}:}{ a function for computing predictions. } \item{\code{prediction_weights}:}{ a function for extracting weights from terminal nodes. } \item{\code{get_where}:}{ a function for determining the number of terminal nodes observations fall into. } \item{\code{update}:}{ a function for updating weights.} } } \section{Methods}{ \describe{ \item{treeresponse}{\code{signature(object = "RandomForest")}: ... } \item{weights}{\code{signature(object = "RandomForest")}: ... } \item{where}{\code{signature(object = "RandomForest")}: ... } } } \examples{ set.seed(290875) ### honest (i.e., out-of-bag) cross-classification of ### true vs. predicted classes data("mammoexp", package = "TH.data") table(mammoexp$ME, predict(cforest(ME ~ ., data = mammoexp, control = cforest_unbiased(ntree = 50)), OOB = TRUE)) } \keyword{classes} party/man/readingSkills.Rd0000644000176200001440000000332211710236471015250 0ustar liggesusers\name{readingSkills} \alias{readingSkills} \docType{data} \title{ Reading Skills } \description{ A toy data set illustrating the spurious correlation between reading skills and shoe size in school-children. } \usage{data("readingSkills")} \format{ A data frame with 200 observations on the following 4 variables. \describe{ \item{\code{nativeSpeaker}}{a factor with levels \code{no} and \code{yes}, where \code{yes} indicates that the child is a native speaker of the language of the reading test.} \item{\code{age}}{age of the child in years.} \item{\code{shoeSize}}{shoe size of the child in cm.} \item{\code{score}}{raw score on the reading test.} } } \details{ In this artificial data set, that was generated by means of a linear model, \code{age} and \code{nativeSpeaker} are actual predictors of the \code{score}, while the spurious correlation between \code{score} and \code{shoeSize} is merely caused by the fact that both depend on \code{age}. The true predictors can be identified, e.g., by means of partial correlations, standardized beta coefficients in linear models or the conditional random forest variable importance, but not by means of the standard random forest variable importance (see example). } \examples{ set.seed(290875) readingSkills.cf <- cforest(score ~ ., data = readingSkills, control = cforest_unbiased(mtry = 2, ntree = 50)) # standard importance varimp(readingSkills.cf) # the same modulo random variation varimp(readingSkills.cf, pre1.0_0 = TRUE) # conditional importance, may take a while... varimp(readingSkills.cf, conditional = TRUE) } \keyword{datasets} party/man/party_intern.Rd0000644000176200001440000000077213213722761015203 0ustar liggesusers\name{party_intern} \alias{party_intern} \title{ Call internal functions. } \description{ Call one of the internal party functions. } \usage{ party_intern(..., fun = c("R_TreeGrow", "R_get_nodeID", "R_getpredictions", "initVariableFrame", "ctreedpp", "newinputs", "R_predict")) } \arguments{ \item{\dots}{Arguments to \code{fun}.} \item{fun}{The name on an internal party function.} } \details{ This function must not be called under any circumstances. } \keyword{internal} party/man/initialize-methods.Rd0000644000176200001440000000167111674356046016276 0ustar liggesusers\name{Initialize Methods} \docType{methods} \alias{initialize} \alias{initialize-methods} \alias{initialize,ExpectCovarInfluence-method} \alias{initialize,ExpectCovar-method} \alias{initialize,LinStatExpectCovar-method} \alias{initialize,LinStatExpectCovarMPinv-method} \alias{initialize,svd_mem-method} \alias{initialize,VariableFrame-method} \title{ Methods for Function initialize in Package `party' } \description{ Methods for function \code{initialize} in package \pkg{party} -- those are internal functions not to be called by users. } \section{Methods}{ \describe{ \item{.Object = "ExpectCovarInfluence"}{\code{new("ExpectCovarInfluence")}} \item{.Object = "ExpectCovar"}{\code{new("ExpectCovar")}} \item{.Object = "LinStatExpectCovar"}{\code{new("LinStatExpectCovar")}} \item{.Object = "LinStatExpectCovarMPinv"}{\code{new("LinStatExpectCovarMPinv")}} \item{.Object = "VariableFrame"}{\code{new("VariableFrame")}} }} \keyword{methods} party/man/BinaryTree-class.Rd0000644000176200001440000001373212062322013015617 0ustar liggesusers\name{BinaryTree Class} \docType{class} \alias{BinaryTree-class} \alias{weights} \alias{weights-methods} \alias{weights,BinaryTree-method} \alias{show,BinaryTree-method} \alias{where} \alias{where-methods} \alias{where,BinaryTree-method} \alias{response} \alias{response-methods} \alias{response,BinaryTree-method} \alias{nodes} \alias{nodes-methods} \alias{nodes,BinaryTree,integer-method} \alias{nodes,BinaryTree,numeric-method} \alias{treeresponse} \alias{treeresponse-methods} \alias{treeresponse,BinaryTree-method} \title{Class "BinaryTree"} \description{A class for representing binary trees.} \section{Objects from the Class}{ Objects can be created by calls of the form \code{new("BinaryTree", ...)}. The most important slot is \code{tree}, a (recursive) list with elements \describe{ \item{nodeID}{ an integer giving the number of the node, starting with \code{1} in the root node.} \item{weights}{ the case weights (of the learning sample) corresponding to this node.} \item{criterion}{ a list with test statistics and p-values for each partial hypothesis.} \item{terminal}{ a logical specifying if this is a terminal node.} \item{psplit}{ primary split: a list with elements \code{variableID} (the number of the input variable splitted), \code{ordered} (a logical whether the input variable is ordered), \code{splitpoint} (the cutpoint or set of levels to the left), \code{splitstatistics} saves the process of standardized two-sample statistics the split point estimation is based on. The logical \code{toleft} determines if observations go left or right down the tree. For nominal splits, the slot \code{table} is a vector being greater zero if the corresponding level is available in the corresponding node.} \item{ssplits}{ a list of surrogate splits, each with the same elements as \code{psplit}.} \item{prediction}{ the prediction of the node: the mean for numeric responses and the conditional class probabilities for nominal or ordered respones. For censored responses, this is the mean of the logrank scores and useless as such.} \item{left}{ a list representing the left daughter node. } \item{right}{ a list representing the right daugther node.} } Please note that this data structure may be subject to change in future releases of the package. } \section{Slots}{ \describe{ \item{\code{data}:}{ an object of class \code{"\linkS4class{ModelEnv}"}.} \item{\code{responses}:}{ an object of class \code{"VariableFrame"} storing the values of the response variable(s). } \item{\code{cond_distr_response}:}{ a function computing the conditional distribution of the response. } \item{\code{predict_response}:}{ a function for computing predictions. } \item{\code{tree}:}{ a recursive list representing the tree. See above. } \item{\code{where}:}{ an integer vector of length n (number of observations in the learning sample) giving the number of the terminal node the corresponding observations is element of. } \item{\code{prediction_weights}:}{ a function for extracting weights from terminal nodes. } \item{\code{get_where}:}{ a function for determining the number of terminal nodes observations fall into. } \item{\code{update}:}{ a function for updating weights.} } } \section{Extends}{ Class \code{"BinaryTreePartition"}, directly. } \section{Methods}{ \describe{ \item{\code{response(object, ...)}:}{extract the response variables the tree was fitted to.} \item{\code{treeresponse(object, newdata = NULL, ...)}:}{compute statistics for the conditional distribution of the response as modelled by the tree. For regression problems, this is just the mean. For nominal or ordered responses, estimated conditional class probabilities are returned. Kaplan-Meier curves are computed for censored responses. Note that a list with one element for each observation is returned.} \item{\code{Predict(object, newdata = NULL, ...)}:}{ compute predictions.} \item{\code{weights(object, newdata = NULL, ...)}:}{ extract the weight vector from terminal nodes each element of the learning sample is element of (\code{newdata = NULL}) and for new observations, respectively.} \item{\code{where(object, newdata = NULL, ...)}:}{ extract the number of the terminal nodes each element of the learning sample is element of (\code{newdata = NULL}) and for new observations, respectively.} \item{\code{nodes(object, where, ...)}:}{ extract the nodes with given number (\code{where}).} \item{\code{plot(x, ...)}:}{ a plot method for \code{BinaryTree} objects, see \code{\link{plot.BinaryTree}}.} \item{\code{print(x, ...)}:}{ a print method for \code{BinaryTree} objects.} } } \examples{ set.seed(290875) airq <- subset(airquality, !is.na(Ozone)) airct <- ctree(Ozone ~ ., data = airq, controls = ctree_control(maxsurrogate = 3)) ### distribution of responses in the terminal nodes plot(airq$Ozone ~ as.factor(where(airct))) ### get all terminal nodes from the tree nodes(airct, unique(where(airct))) ### extract weights and compute predictions pmean <- sapply(weights(airct), function(w) weighted.mean(airq$Ozone, w)) ### the same as drop(Predict(airct)) ### or unlist(treeresponse(airct)) ### don't use the mean but the median as prediction in each terminal node pmedian <- sapply(weights(airct), function(w) median(airq$Ozone[rep(1:nrow(airq), w)])) plot(airq$Ozone, pmean, col = "red") points(airq$Ozone, pmedian, col = "blue") } \keyword{classes} party/man/ctree.Rd0000644000176200001440000001711114010221466013552 0ustar liggesusers\name{Conditional Inference Trees} \alias{ctree} \alias{conditionalTree} \title{ Conditional Inference Trees } \description{ Recursive partitioning for continuous, censored, ordered, nominal and multivariate response variables in a conditional inference framework. } \usage{ ctree(formula, data, subset = NULL, weights = NULL, controls = ctree_control(), xtrafo = ptrafo, ytrafo = ptrafo, scores = NULL) } \arguments{ \item{formula}{ a symbolic description of the model to be fit. Note that symbols like \code{:} and \code{-} will not work and the tree will make use of all variables listed on the rhs of \code{formula}.} \item{data}{ a data frame containing the variables in the model. } \item{subset}{ an optional vector specifying a subset of observations to be used in the fitting process.} \item{weights}{ an optional vector of weights to be used in the fitting process. Only non-negative integer valued weights are allowed.} \item{controls}{an object of class \code{\link{TreeControl}}, which can be obtained using \code{\link{ctree_control}}.} \item{xtrafo}{ a function to be applied to all input variables. By default, the \code{\link{ptrafo}} function is applied.} \item{ytrafo}{ a function to be applied to all response variables. By default, the \code{\link{ptrafo}} function is applied.} \item{scores}{ an optional named list of scores to be attached to ordered factors.} } \details{ Conditional inference trees estimate a regression relationship by binary recursive partitioning in a conditional inference framework. Roughly, the algorithm works as follows: 1) Test the global null hypothesis of independence between any of the input variables and the response (which may be multivariate as well). Stop if this hypothesis cannot be rejected. Otherwise select the input variable with strongest association to the resonse. This association is measured by a p-value corresponding to a test for the partial null hypothesis of a single input variable and the response. 2) Implement a binary split in the selected input variable. 3) Recursively repeate steps 1) and 2). The implementation utilizes a unified framework for conditional inference, or permutation tests, developed by Strasser and Weber (1999). The stop criterion in step 1) is either based on multiplicity adjusted p-values (\code{testtype == "Bonferroni"} or \code{testtype == "MonteCarlo"} in \code{\link{ctree_control}}), on the univariate p-values (\code{testtype == "Univariate"}), or on values of the test statistic (\code{testtype == "Teststatistic"}). In both cases, the criterion is maximized, i.e., 1 - p-value is used. A split is implemented when the criterion exceeds the value given by \code{mincriterion} as specified in \code{\link{ctree_control}}. For example, when \code{mincriterion = 0.95}, the p-value must be smaller than $0.05$ in order to split this node. This statistical approach ensures that the right sized tree is grown and no form of pruning or cross-validation or whatsoever is needed. The selection of the input variable to split in is based on the univariate p-values avoiding a variable selection bias towards input variables with many possible cutpoints. Multiplicity-adjusted Monte-Carlo p-values are computed following a "min-p" approach. The univariate p-values based on the limiting distribution (chi-square or normal) are computed for each of the random permutations of the data. This means that one should use a quadratic test statistic when factors are in play (because the evaluation of the corresponding multivariate normal distribution is time-consuming). By default, the scores for each ordinal factor \code{x} are \code{1:length(x)}, this may be changed using \code{scores = list(x = c(1,5,6))}, for example. Predictions can be computed using \code{\link{predict}} or \code{\link{treeresponse}}. The first function accepts arguments \code{type = c("response", "node", "prob")} where \code{type = "response"} returns predicted means, predicted classes or median predicted survival times, \code{type = "node"} returns terminal node IDs (identical to \code{\link{where}}) and \code{type = "prob"} gives more information about the conditional distribution of the response, i.e., class probabilities or predicted Kaplan-Meier curves and is identical to \code{\link{treeresponse}}. For observations with zero weights, predictions are computed from the fitted tree when \code{newdata = NULL}. For a general description of the methodology see Hothorn, Hornik and Zeileis (2006) and Hothorn, Hornik, van de Wiel and Zeileis (2006). Introductions for novices can be found in Strobl et al. (2009) and at \url{https://github.com/christophM/overview-ctrees}. } \value{ An object of class \code{\link{BinaryTree-class}}. } \references{ Helmut Strasser and Christian Weber (1999). On the asymptotic theory of permutation statistics. \emph{Mathematical Methods of Statistics}, \bold{8}, 220--250. Torsten Hothorn, Kurt Hornik, Mark A. van de Wiel and Achim Zeileis (2006). A Lego System for Conditional Inference. \emph{The American Statistician}, \bold{60}(3), 257--263. Torsten Hothorn, Kurt Hornik and Achim Zeileis (2006). Unbiased Recursive Partitioning: A Conditional Inference Framework. \emph{Journal of Computational and Graphical Statistics}, \bold{15}(3), 651--674. Preprint available from \url{https://www.zeileis.org/papers/Hothorn+Hornik+Zeileis-2006.pdf} Carolin Strobl, James Malley and Gerhard Tutz (2009). An Introduction to Recursive Partitioning: Rationale, Application, and Characteristics of Classification and Regression Trees, Bagging, and Random forests. \emph{Psychological Methods}, \bold{14}(4), 323--348. } \examples{ set.seed(290875) ### regression airq <- subset(airquality, !is.na(Ozone)) airct <- ctree(Ozone ~ ., data = airq, controls = ctree_control(maxsurrogate = 3)) airct plot(airct) mean((airq$Ozone - predict(airct))^2) ### extract terminal node ID, two ways all.equal(predict(airct, type = "node"), where(airct)) ### classification irisct <- ctree(Species ~ .,data = iris) irisct plot(irisct) table(predict(irisct), iris$Species) ### estimated class probabilities, a list tr <- treeresponse(irisct, newdata = iris[1:10,]) ### ordinal regression data("mammoexp", package = "TH.data") mammoct <- ctree(ME ~ ., data = mammoexp) plot(mammoct) ### estimated class probabilities treeresponse(mammoct, newdata = mammoexp[1:10,]) ### survival analysis if (require("TH.data") && require("survival")) { data("GBSG2", package = "TH.data") GBSG2ct <- ctree(Surv(time, cens) ~ .,data = GBSG2) plot(GBSG2ct) treeresponse(GBSG2ct, newdata = GBSG2[1:2,]) } ### if you are interested in the internals: ### generate doxygen documentation \dontrun{ ### download src package into temp dir tmpdir <- tempdir() tgz <- download.packages("party", destdir = tmpdir)[2] ### extract untar(tgz, exdir = tmpdir) wd <- setwd(file.path(tmpdir, "party")) ### run doxygen (assuming it is there) system("doxygen inst/doxygen.cfg") setwd(wd) ### have fun browseURL(file.path(tmpdir, "party", "inst", "documentation", "html", "index.html")) } } \keyword{tree} party/man/varimp.Rd0000644000176200001440000001416614010221477013757 0ustar liggesusers\name{varimp} \alias{varimp} \alias{varimpAUC} \title{ Variable Importance } \description{ Standard and conditional variable importance for `cforest', following the permutation principle of the `mean decrease in accuracy' importance in `randomForest'. } \usage{ varimp(object, mincriterion = 0, conditional = FALSE, threshold = 0.2, nperm = 1, OOB = TRUE, pre1.0_0 = conditional) varimpAUC(...) } \arguments{ \item{object}{ an object as returned by \code{cforest}.} \item{mincriterion}{ the value of the test statistic or 1 - p-value that must be exceeded in order to include a split in the computation of the importance. The default \code{mincriterion = 0} guarantees that all splits are included.} \item{conditional}{ a logical determining whether unconditional or conditional computation of the importance is performed. } \item{threshold}{ the threshold value for (1 - p-value) of the association between the variable of interest and a covariate, which must be exceeded inorder to include the covariate in the conditioning scheme for the variable of interest (only relevant if \code{conditional = TRUE}). A threshold value of zero includes all covariates.} \item{nperm}{ the number of permutations performed.} \item{OOB}{ a logical determining whether the importance is computed from the out-of-bag sample or the learning sample (not suggested).} \item{pre1.0_0}{ Prior to party version 1.0-0, the actual data values were permuted according to the original permutation importance suggested by Breiman (2001). Now the assignments to child nodes of splits in the variable of interest are permuted as described by Hapfelmeier et al. (2012), which allows for missing values in the explanatory variables and is more efficient wrt memory consumption and computing time. This method does not apply to conditional variable importances.} \item{\dots}{Arguments to \code{\link[varImp]{varImpAUC}}.} } \details{ Function \code{varimp} can be used to compute variable importance measures similar to those computed by \code{\link[randomForest]{importance}}. Besides the standard version, a conditional version is available, that adjusts for correlations between predictor variables. If \code{conditional = TRUE}, the importance of each variable is computed by permuting within a grid defined by the covariates that are associated (with 1 - p-value greater than \code{threshold}) to the variable of interest. The resulting variable importance score is conditional in the sense of beta coefficients in regression models, but represents the effect of a variable in both main effects and interactions. See Strobl et al. (2008) for details. Note, however, that all random forest results are subject to random variation. Thus, before interpreting the importance ranking, check whether the same ranking is achieved with a different random seed -- or otherwise increase the number of trees \code{ntree} in \code{\link{ctree_control}}. Note that in the presence of missings in the predictor variables the procedure described in Hapfelmeier et al. (2012) is performed. Function \code{varimpAUC} is a wrapper for \code{\link[varImp]{varImpAUC}} which implements AUC-based variables importances as described by Janitza et al. (2012). Here, the area under the curve instead of the accuracy is used to calculate the importance of each variable. This AUC-based variable importance measure is more robust towards class imbalance. For right-censored responses, \code{varimp} uses the integrated Brier score as a risk measure for computing variable importances. This feature is extremely slow and experimental; use at your own risk. } \value{ A vector of `mean decrease in accuracy' importance scores. } \references{ Leo Breiman (2001). Random Forests. \emph{Machine Learning}, 45(1), 5--32. Alexander Hapfelmeier, Torsten Hothorn, Kurt Ulm, and Carolin Strobl (2012). A New Variable Importance Measure for Random Forests with Missing Data. \emph{Statistics and Computing}, \doi{10.1007/s11222-012-9349-1} Torsten Hothorn, Kurt Hornik, and Achim Zeileis (2006b). Unbiased Recursive Partitioning: A Conditional Inference Framework. \emph{Journal of Computational and Graphical Statistics}, \bold{15} (3), 651-674. Preprint available from \url{https://www.zeileis.org/papers/Hothorn+Hornik+Zeileis-2006.pdf} Silke Janitza, Carolin Strobl and Anne-Laure Boulesteix (2013). An AUC-based Permutation Variable Importance Measure for Random Forests. BMC Bioinformatics.2013, \bold{14} 119. \doi{10.1186/1471-2105-14-119} Carolin Strobl, Anne-Laure Boulesteix, Thomas Kneib, Thomas Augustin, and Achim Zeileis (2008). Conditional Variable Importance for Random Forests. \emph{BMC Bioinformatics}, \bold{9}, 307. \doi{10.1186/1471-2105-9-307} } \examples{ set.seed(290875) readingSkills.cf <- cforest(score ~ ., data = readingSkills, control = cforest_unbiased(mtry = 2, ntree = 50)) # standard importance varimp(readingSkills.cf) # the same modulo random variation varimp(readingSkills.cf, pre1.0_0 = TRUE) # conditional importance, may take a while... varimp(readingSkills.cf, conditional = TRUE) \dontrun{ data("GBSG2", package = "TH.data") ### add a random covariate for sanity check set.seed(29) GBSG2$rand <- runif(nrow(GBSG2)) object <- cforest(Surv(time, cens) ~ ., data = GBSG2, control = cforest_unbiased(ntree = 20)) vi <- varimp(object) ### compare variable importances and absolute z-statistics layout(matrix(1:2)) barplot(vi) barplot(abs(summary(coxph(Surv(time, cens) ~ ., data = GBSG2))$coeff[,"z"])) ### looks more or less the same } } \keyword{tree} party/man/fit-methods.Rd0000644000176200001440000000063311674356046014714 0ustar liggesusers\name{Fit Methods} \docType{methods} \alias{fit-methods} \alias{fit,StatModel,LearningSample-method} \title{ Fit `StatModel' Objects to Data } \description{ Fit a `StatModel' model to objects of class `LearningSample'. } \section{Methods}{ \describe{ \item{fit}{\code{signature(model = "StatModel", data = "LearningSample")}: fit \code{model} to \code{data}.} } } \keyword{methods} party/man/ForestControl-class.Rd0000644000176200001440000000204211717447641016373 0ustar liggesusers\name{ForestControl-class} \docType{class} \alias{ForestControl-class} \title{Class "ForestControl" } \description{ Objects of this class represent the hyper parameter setting for forest growing. } \section{Objects from the Class}{ Objects can be created by \code{\link{cforest_control}}. } \section{Slots}{ \describe{ \item{\code{ntree}:}{number of trees in the forest.} \item{\code{replace}:}{sampling with or without replacement.} \item{\code{fraction}:}{fraction of observations to sample without replacement.} \item{\code{trace}:}{logical indicating if a progress bar shall be printed.} \item{\code{varctrl}:}{Object of class \code{"VariableControl"}} \item{\code{splitctrl}:}{Object of class \code{"SplitControl"}} \item{\code{gtctrl}:}{Object of class \code{"GlobalTestControl"}} \item{\code{tgctrl}:}{Object of class \code{"TreeGrowControl"}} } } \section{Extends}{ Class \code{"TreeControl"}, directly. } \section{Methods}{ No methods defined with class "ForestControl" in the signature. } \keyword{classes} party/DESCRIPTION0000644000176200001440000000460614124331102013114 0ustar liggesusersPackage: party Title: A Laboratory for Recursive Partytioning Date: 2021-09-27 Version: 1.3-9 Authors@R: c(person("Torsten", "Hothorn", role = c("aut", "cre"), email = "Torsten.Hothorn@R-project.org", comment = c(ORCID = "0000-0001-8301-0471")), person("Kurt", "Hornik", role = "aut"), person("Carolin", "Strobl", role = "aut"), person("Achim", "Zeileis", role = "aut", email = "Achim.Zeileis@R-project.org", comment = c(ORCID = "0000-0003-0918-3766"))) Description: A computational toolbox for recursive partitioning. The core of the package is ctree(), an implementation of conditional inference trees which embed tree-structured regression models into a well defined theory of conditional inference procedures. This non-parametric class of regression trees is applicable to all kinds of regression problems, including nominal, ordinal, numeric, censored as well as multivariate response variables and arbitrary measurement scales of the covariates. Based on conditional inference trees, cforest() provides an implementation of Breiman's random forests. The function mob() implements an algorithm for recursive partitioning based on parametric models (e.g. linear models, GLMs or survival regression) employing parameter instability tests for split selection. Extensible functionality for visualizing tree-structured regression models is available. The methods are described in Hothorn et al. (2006) , Zeileis et al. (2008) and Strobl et al. (2007) . Depends: R (>= 3.0.0), methods, grid, stats, mvtnorm (>= 1.0-2), modeltools (>= 0.2-21), strucchange LinkingTo: mvtnorm Imports: survival (>= 2.37-7), coin (>= 1.1-0), zoo, sandwich (>= 1.1-1) Suggests: TH.data (>= 1.0-3), mlbench, colorspace, MASS, vcd, ipred, varImp, randomForest LazyData: yes License: GPL-2 URL: http://party.R-forge.R-project.org NeedsCompilation: yes Packaged: 2021-09-27 11:30:45 UTC; hothorn Author: Torsten Hothorn [aut, cre] (), Kurt Hornik [aut], Carolin Strobl [aut], Achim Zeileis [aut] () Maintainer: Torsten Hothorn Repository: CRAN Date/Publication: 2021-09-27 12:00:02 UTC party/build/0000755000176200001440000000000014124325544012514 5ustar liggesusersparty/build/vignette.rds0000644000176200001440000000051614124325544015055 0ustar liggesusersuPN@Bx42!*旷]=̛}ĞͼY`Ou/d'ZF5  -WQ S$=@+MQR#9J-dk:GuMa7u!R˾g*SḭDԔ1dȅ?'Ut3(ə=q`LPr+-r/^>6-c(%"9C#w =X҅;|K1s5$v?Dh/_kQ|LIlGve,xZI'ZcVV`"£X7vG?~P$ģY7tvdB!RySUTjebW+p*sYl3/T2['oW6K_5CQOAS ݤ DYpa܁ӑP<+Y&Y__gyj;Kˁ[9{Pw 1Ƨ vxn"goW%e{Sȟخr5!z~2P !oxE9wY^rS뾵'g3-26|f)>Lx K1EF>ȴ74~!vۧA. 샬5,WܲSdR0dY-:A=w;oCW9` -omUU3sϥ66j% dvy&${7֨AjdPFK52 lU\fU7`V] #ȏI=خIo#\x7T{|R%m9:gy>QWxïPM~/@&Aɝ~ kVШxR˽ؕ>@05U ^ayui<, Ye]j8^-ƪS2MCT.^At އ|߼ͫ8%@B<]##,d pEZ07bc~.:V4;Y ފñj}SC~n\߃l-whJQírRܢQ 7#hlr Oɩ̗j2S 7L?ƴPpR!FU>ĸn􄢖Ȟ-ʊ\ygyV.3*U9]>xXټƐei\㍹x!u^̷hrN̸Vb.q8);'ǐw3Xtp\Rk);=5E&%4|wH(D 1drU4RrƢGPO O4mTWdX=ntZۍi{ G8y8c33|hSi)h:RU'$ŸY:l@15At')wzenHG?ٞSkȯƬe.2'ZLy=ɞABeR '*ѐ!J %^|ոΰ"ڸMXɭx+ًbAU5?i/J$yO ^,R(j-k E8yjƠZ7 0C9QBy]3%r7_C}ΫexuKT\r:Z7]E VQ6|1T/}|CyNɿOׇ(y wGۉ4ed4{ۗh.ݐ!߇Ӹ0ܟ{7,AP&: 9S}lxlM.@6˳e Bא=3m@!ܡ4Vy6T@NY] $ YΔ\8 y|ZCx -cr4rԇ6d]gb]u.l(z|lMwĪzPg8,N% ?[16=OD2p?SUθ :0.S:E+ɼlZC%|wj:k4좽Ρh)3^cK`5|/B{M?p@1-эܩ,^qٲm"Ŕđ1YX ˡw%ڦ>l 6h; QFq$%{ ^HO%6pG[hC|`(V]G :e!+'шp X~L/UBɎKRh' k0. tݺC}9 a4e}[RbW JU!-ZKRjڝFކ|MߑOJ3oyKuڛZ{"DsǨa)dP8v B"Lrb_ֻIQѷXY\=ad0 \r};Ֆ=Ɏ a:꾰ldrinzkN4>Ζ޸j>oCֲñ\>N#%)!C3L4yjy-sjQ n?y@>b\3XNPX:sxrpO'ނ2HD23]ٌGɝށ|Ǽ[P.d3Xp<~d[A6ㆫzX͢> PҵP9rO>!mc5;"< d%|͎|X5nsz^[0FnrϚ0V?zidBN))2bS..A'^ˍA(M"+{FDl +ݺBݺrxp^2HV+TU^~{cYͳ)қB|<5bTU81`֖5N!WhSX͢uAͷA;"LgVpgsjv/.R+dЬ}Dg> RUӋk524Z^gwϊug;P͑t}< c'Lgq^H])=;ҥb9enl9?G[4@; H}+6#wx V6%xVϡfd^[*Ep N5*ceZD;I̞UG%EH{a <e| oW}17Ltt8D۸Lę2uJ1Y߰g׬W8Pg˰ |q&/1E);|_5C=$H0$ܣ A,`z~?=su<-;-䞓_D%U @GyS0+ڧ1 xEuFp[Nz%bL,G9o9m\xKJxu|bUY>xpXjS t6| ʄ:oW7,HwMR7}MJnhA$ d뽬yFOwh7{!}_F)DSPr!Rr227-^fѕnNw.%:KŪJM ~ƚ }jD](ߌEΉMvLƺlD#Lgog^_B40i[ٚ9'XOjISg?d(Zjf< qKC{d <ѢT|Th&g]^ / ^<ֶ55QGW7!ks۴Ԩi tj&ѭEy'JW YMۺMDgBB<&J*d{& 7w I^~ШBLB~h\/=.7wrc²*؊{x,'?\ "Gou+':6QjWly+' 7!VNtK rm1)1qequP>^m=Z - ZpIG='XN9>R[2M٬( ht3ṝƦWњ4HA%`|YCҒȠ xrzҔ'+o; ­:\ҿމ"HB!M!f̌\3i~_,eHN._}flڳU# 6ȶҕN~LF!!-ȷR40v6d=Ƿ0@]zF/ky| 9 &${P0_/, %*3 }cX LDŽ>5/6]_.s=ͦG]zunR1 ͸˷_57r@(ŭ2&-{R`+)<\UmXe3tCJu6!kP@!nC~n\҈ g6^@$bn=5%6Agx:m)}G_27闯[ʪml;~^cjVV@P !j۔YX#{I; sxM\qZ]CaEb oC6؎5Ny=*4wacW>4gՖŵs YkYvoB#S[ |@No;>lLj& , ۵#ƮDwԯ)v=uf3l-iء:avrOaWTpU=ץۺjBE`CZ|+'PGՠK4~E4.(6gVݳٴ[/SXg-P*aB)a^Q֦0"t=<:@yn&<(Or&:>=~G&Ts rC Nֹm1Z0"߀o L4uW4| 09{| J p@E%ȗRm5J.%^OMvW^U?`C$'VŰJmj7ˁ?~`-ٓ u\ul :#DK#Å׆F _>8mg_ݟ;"!; 2S!ԱU_vBc 5#D3:YOǜ/p{=lE!Sx/ly6+tkږRm*b|]l_Ϫ'5R#ػEғ?ޭlU +=-ϝrFJ^JxoCyT\e$j1޲xÝiO)-edn55yg.qnF--:e}7n+=L Sr4ӮK_ GәbQ\(moJU=) l wW9R=<{fpnTVv]Gk\&ǿ|~;<>uDGE|g%?_cq+UWd5hǿ _<_/.ewIlev@OC>bv@e k`?+|!_L:(KPorWhW+c"N@6_ +Wl{;vCэT$doijU⃲EGlGwQTIJGBVV9 o@6iG^/4BѵɞZUۣ V4 2U0qqγuBq.<+&<zvYxqG/X'ʮYsr8%S,o@;xefy%`hbܱqoFPji˝%;;Z==[d0;);(w 7iGT<"!\|ޘ?N5Qf-W '} wG?2fw74 kEM~d 7Qɟ K rpZ-Y55=]*.;Gmܛ_Hr z9o ;NTieӋ5d"ƽ9:2j lZܼVlY!| qoϹ5uJ 'PҎ0ly%6ZV{ s\rl0TK/6Άِy~V(yɹ+Ժxɋ8VE~UL r秶w62:,VKܔ70 K7^8xdew5b[-ǿ6Po種W6i;DH/mnY.޺) ΨMڻ1޷Mm|z# 6M_R+ZAO3'1P/Xn'lv7=[zQk1P{6TtaůjWZ[n.mN 4^;!uZXIܢηql޾CoLP|O z' ZݶVTw{ Fk GqC!/6Gq(>۱a8L.JP_r0 勹_HğA \P-Z塲04;/[=4[ ǍCt!iPP /'DJWGJc#cXjՁ ]|T *镍6B_j)PqClPa5^Ύ*Q(a̺靧:g+K)3zgᗟiLa ϡQ0fáa|8.-rӨlS#Sl^Gc5Բiܝz.k9&ßۅA^t 633Yu,7@I%{/Qqu^,.WǑa&3A Ҩ[ݖJ>A i9kjnu%tG02fƝG3mM/_=~EWC?(JG έg]ɭKV3rVΆo] ejg- cŝ'B!VGɞ|j+8"p 3c"՗\44;4d ~!ՆOPZ2ufswA!٦$3D 7j0^aդ P Fh`M^W%x| d!i6{e> ܢJGB 񘪅%WP#WP$ی~!nmrw4`~vUtnu5+m :ӔA5- &.Rr r]B<> ^l/p"q'$u"tиҳ-D* ܧm[vtCϿ偒iBI]^l&@&2XZ܈.ݩ{k[esOߗO=% |A=%PB<]7>L,'IA>6\4Kj6;,@.tߦPCMaݷ)xnSNڔpw,ܦsj`/^B"DzeNyF[%i&jJBCvN4iA>=mX]rVY\Y zu٦ E6z?5&n ~w֠ YkCJ߄J"ViJxғPɎtS M[{& MTG kL)$ph,!+-(*p,0y<^h7DZ>Xr=eބDMoAoyuVuݾ }YObip#08 D߆lķ _SJJ|{J'Wddn)sxXO] `qԹ| LX2SC2OA:̽KVMX^mn_Vfڶ00[if9 Y;pZ3Q v,ay k*lkww@LIO'$ؙPރU;CߗP{Z`'G5℗,0}(T(FFAZ>OCs ?ls)k1%O׵.4ngxMGBsv\״.4nw.4j]hn[YNP@ fY`zf{&c""z4Xŷl(#s:愯 S%u5Q߅HQ6RӛmW'_rq8YK7w<"w{0B="ܪnP:4heLwK_3 ʝ?| 9M y$oeUg$uU ~QUN4e' . eEH:i&u*1%SrC;l+ [C %Ϫ0 Ҿ3xosC;vGL/?1㏨ yݮFXYJSR"DfS~,J,I]w[џ?]WgPgU"ҟ,p?ڪF6m=quq"\e€@ųx]~=琟09+X e[@ah*T;h0ηhEɎ@VlF)Q t>G?h݅ ^;SEGt{vez8y*?(σd`jL(jSju3 - HESV>;rp )[_SdLbJ t3uop[g5<0>|Q?V{c%xRFEŤ}*[vh"#6J BҠД N^?.8du² yO<qg;XvKZ rx *gϴSWp*:,d KW!ZN~\ҨaWRފS'7'S'W 2]} 8BY-XU6p1f=UW]mc7V%aʢa[_ kPW@I#KuD|r] }>fuYWЎ5:ki0A6Tkx|j˄T;EPsb.-"62p+ +g{q̭i7Jp`SBɡe^+/T3xj3F 5޾knVyNw%lGe7;jAz[sZ -y1`x&m`3Eb!DϠ5v٣fyOֱl]JH0d=G8麵 4M*11zA XtolnRi_7@nCRp5o|E|K ҸX4ǫl˳h2sM3ȷn/&W% BQȣo%eȗ$Yo%fuMNUw'jW/!k,c\ͳ\D9@}y])ͭUG;ru͝3y8jCgjoP8YkFRrQZ|D\C%^|7S%^i>iCnC~l\7=LiSOQ4PsAݫ Jaקsh𥚛C(]4`LXP2} z qFX /L0;cL[ w>XVƘc&&-&_X*TCoe4ߪ5z GcJcSo3 di `p5¶ɴ҄wP'B#zf:0u'=l#ZXvbb9eq,hcH5?+J%\$;!N\J<)oM+L!ш+A'" YOe-YC%aT:[CY1彑"up%=N6%2܅e J2de w DV}O=FV.BZUJx!j/I}@/[?`pȋEm/ 0A]PU\?g lKxt x6cp #<ZZxk]ӭAuZ[kS8- z$3S-'0nB6tЙ& π/!܂/*0;l%$D) < p畄뾥$n+4$; o·? | \X4kN3A7H:tFhWZ^:6F`Kxn4#&7]hF]ʤLcFnr 2 Ί{F.xJikHS}аޫ> |fy{mf-YNh_/1le,{bLLݩ鏠!?7Kq ѩrFKی#EQTs—_j HFE8#BJ ?f/XU$Wp9РvrKe6ro{16Шqcȏ;o8)'͗PD)[лU4tzأxD0d+\e d) ~ 7J OutAqxrzu5 R탔ڧ!Cr}c)Tqއ16%;~ xLC_8%#!n(͎3Ǚ|cx KcİuT]yf>{e(>k1`=JsW/. y3d5I/uv׭.&Z\gрO͖r&AހE[Hoe:1?Y ?[Q`[9 Y৐?M+d{99R^!ygdLp0/:r)!K6]VLD :do)Ь eVl@+' u\ul :# F_<6VG 儼e_ݟ;"!6xv} QZNIOO-b{=l-A͟/\oi(݁-fžlʷMK]ܷQad>%g;I.aփIO6U_#˖NotveRU:.hKwSVr~_,6]΢]H7fѭVëo%Jl),S';߱=:'7aaq؏_ؙF 4.$we«#өbIQ3!5߶KcWW!4 bVBiGN Gvs^X9J<;{[ԋ&ayDAgqN(~ y;MZÅTwϿW]/nß uNEG27?팎:Y}=у4O6v!^o ,04+6_JWGJc#c;TC²65FZ-w?k†E嗴)WC'-Pa, ??vl_=^LFowQ"wS{K6͈zb g,l{2 WY^w_i71+ﺪS*qkR7 Jd~0g@<4EޙuK{@@SRG]-t,.!i dҭZI4Y8P9Z$+xVMuUv8_n; b{&vֵ[uCOiHcvFLP_l* ;1#GRԞ qf?G_m\OiD^Lfڳ˔) ,BLGJQi]蕕}ML$ң/{3߸FB/ 㐏V7yї 8y;5txE:и;c:ƜpkTcre'KD' > B`L@fz7_Ԉ\ fL-%t>@f^EhBU J*22C,_7&Ǧk>HOl#`!tZC LK||L f\6nctYtƋ[ą4n\ J >Z'ՄӐ븯m"Qgbw!jNtOB>CKg/|ʸ,hˤ3g<Σ ;70,rl;~Ho> {8 y8; m;P3{uF!7,s9ʎx|Z~q0z.ԘCp5%/dZ km7Av[b8yl/mT=vzi !<ĸJ݈B0ӭ7vcIaa 5LaӜw V g=0!ނlkOhCXr,͊6zGZ~ 3WBع.[kHe}|HgҞ`)"Bc;tQr#)眴߈4 d)ͿEi 7bv 8%w ob<*%o ja纬-4(fG ѡŶr>bNQW`r&Tkhi ݯ'mz}Q_nEJQEmjZ&%;Y8RJ~BB<]oRw4MVv= mq޶XSd+VnSx#a4nqSe]T2l>7"o%T$7z'4rkZfÝu%R%8hK܅@!^{S\2@)yd\-&7 Jn Bwj4p(fXCG7'00A (gUU{;~WBxW׼+fM<d8&0!qQK$Sb9e9Ɋ̩ИͪYlO3EѪ2+vTO A9k5햚ͼ|[<P!&4lKMiaߎS}lx<Ӹ8}% "(0 mөWvfyz,Zwj kZ[ۭvJ:pl"mDOq(ף {@(,XiZk)^ cvڋWU3$XL M]Jc1?܅Ҕ\/;/7|Se ,R.%ϯmlo#'/ Ha|"#׫:oYntxꀲ1qgTwϣǤ|=ˤi?3{[ ɧMa*j)p>9Bk܃ifsW/Ҹ=Fs g ku):{EC {R,)%u-FF3Y}'nݣq$ޥXsU> JARJm9!Y"gUz3i=Iw/BZ]iBM:r)!Kːͣa]VLD :d(V<֧@VsZƳ}nIkɞ\;ҷ?['&&fKo7%%rhF #ׯ] +#cgrB޲Lݝ}^I'U3HoPv}/I%즴ג86SS:sr,Y 5bhXpwFfžlʻXӥؾqn;V15}Uw~*?Dӝ$[:^$= sVX~(W:{/[;咿.:IpKV՞v,-NY]}ճdw;vُCNƾEZ_-eR3qTz{;gWv܄a41Ԛ"7 ;(#!C+)-R}_ wc ̟gԄ!N%,0 db 'gC}Z|=s,B1Qބ|;q8iFbނ|k6;+^cӋ7#8r6Q/!kRW)W2V޼r|"5p~*Ϛ~jzf˯{+ANpvRrY1iJNJx-0Z ǐBlR,X&7c^+tG/>^|up fw wMS/_@~| ew:0Z~l+z3"0 |f_$ӽٲj'v7.f-(Nf%Ck0؛Qr.f'@,6m[eMp#p 5cTJ$m_"4]$n!^ tXy|0 E-p,FLJ]Pf66AzзH;tJ(Ϙx?̯/џ1Xϳ3_Z?U{@MBR)Cr1 E:|ca<+Y&EG[P#өcgϞNnN0búmű;e(wEjLn[iT6sd.p*v?jPlȶYP# imPp ґDAv;c(S϶?zGrɵڰ1PP / C$Xqxu46>2f C:D]h+D[-4w?k,OR:TvXI_ 9u>* ?۱2~`v ?E LG7Btns,QğA \S*rkJAwq^thU+HR*q$o4Zwl˴GGFz Mn~ZEyXqqʡ';FVP}IJaLX+^;#uVzQ!~P$ēRaj#\fV?I58A9ΔI޻ͷeZt(SѬtvdB+ez,jD^i*EHq"suK'(y9;j,pfӊG; WޅmOv٥Aq^W;}E<}o<}<}הZA}bZX^mn1ơc8ܢVn=t,uW2u8y޸ohb>V-OdnwҀj=]! f[IXJvd[/%KB<]oQ;zwG;-4ed 7M8{9Sląʜp>:p:%FN#uGqеr+c|(%q8`ELf$ٰ1T?NŪFW]&+jŮ5t!|qdDqve aIIPM?rП=髙qCuYo!GVm.WaW? Te#g2$W.Ѓ$ĝekoAaˏj^tj^΁?:~"U ZGb [V4g+bxY5SyLLBJ4"j)crKKbjuԚs Yl@1Rnz S0$)Wz^$;y%QB<]wOٕ"$N ⎈,|D߾nT-4( 5Kj[&PMlBo8y< Lmw5 7Ydm E0 LɝހlMyO!%SB͘T!o7bBOC>ﲧY&w!kQw^ 5hc/l {vCJCo lU1lB~I';oAF$Ƴcsj);Ar_jWw&sx xr|ߞVj#+9|b gT/F'/zhM߮"cj @;+PGB eMޔYAϳz?杵NN.E:cIE3.b7TBiq\W;qNs6 *68H\ͪ#b N6C 3ŬL>}z͙DJ%.π/!|J=~ y0 Tn@#{ iGѢ𮙛|ճr!$ܫEenN(%כi.f҈֣ܗR!}2ž/ {P8OƗk+n.ջId3hI{8=K'?Ua])m$/ m~5gboK|O@>yKF~ Yn zVk0zC|>B܂w *BعW{xVnJFh0d2Ȭ |XQb[_"ĕcpf&mc'Q~ܽ}Z>Nڷ{GX·Q^-a(nQ4&pAVq<N5r^&՗n"+ YuJŬ7z^U+}m,BfCމveEm0pGsAt`&}jnշ}動%Chn6!2pX;i.*ɤrXs8`٭%OL{䖇4E$ kθjoM4\ǎ[mhW:*BM:Ӹټ@(PgoP0>\wsW"F:"ﶵy#Զ6Ǥ|LӔ !1CG{vKN@IƦ؊-"1ӅQ_O"3)yZPtVtRejJB6 U<τ'@>by0g:I WS cCXBS솀 k-'9v6UA]ȳG |E[ ߟ~O5KF<{z|$Xe`yu+w--sӎr sU5f`XS칽{͌E6pavRY90`X)GLbPޞ96E[wt܍q {;ol(b5v]J︇N[yow0}7nj]~gIa6Scj$UiH0RTAgOqyNN vʶ㋅k ռH )n/ź;+6{iyAtbwcm IflHv']V0Jg s#Ʈ{IRj W%UX[`Fۡ[׺Z)슄Zj[Y R_U]YCZ0 {TwA6իkP%m@"L4{)bprEknLᦝ5t|Jt_6g+y;QU[N.陻lq*j#/Pn;o hЮhy| Yk3ZF=~:qe_8|l~9N!aAߦ|F_2628:2|up /mLyܯ32=+>:qfWʛ ǽ>YK}9]i, ƨ ȩKp-ɽk3ت2K^?,+-]?]WHmޫZQ_j>F&Z#a|nwړUbd w;azc+Lm>FRƛA40NMղ[|j; Ku?pI| sz]TZ@[SUdo1 2%^ ~/!L%,Rr R_gsRm]ZBプ7o:MY<Nՙmb/aX>Mmy3vӘNQ'S#4OHzwl[UdiNkwVt׵%5i֊U}hLk>:Wʣ?TF*wnՑ0KgL7IcV;]{'v&S˿ [9-gTz{;?&a+S=3zc?~agBz$ģi+T{%cؕ`FA]({S@WFLJ]P:!6622T:_(.1 dl};^KN8y8,7p:*?3q28C7wy8H;tʎ2<ۼZTDxqc~p~a\ڪW["?y6*zyld Hxc噧pF oqzWp*Ҏ02"*FsɳMw,;ePH\$N+cÛ[իÐDSKnL?B5dA>J Y0Zݸk0&0kwun)~%Txj PzXF&4 D䏌qr W+AnQ&@r1)^9^XzՖ> w[9!| qBˮ/^l~cpJ7 Gx6M$"S/MW\Hy{oKOJ%,7.nn3گ2i|x4}%\w2a|k^^?B5|UBIG6팎63Y}=у4WN6v!^U ,04+]JWGJc#chU ]BTFjs剔; nLJ{rOYUu{-L;U޳mMV_?iparty/tests/0000755000176200001440000000000014124325545012560 5ustar liggesusersparty/tests/RandomForest-regtest.Rout.save0000644000176200001440000001600414110652160020436 0ustar liggesusers R version 4.1.1 (2021-08-10) -- "Kick Things" Copyright (C) 2021 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > RNGversion("3.5.2") Warning message: In RNGkind("Mersenne-Twister", "Inversion", "Rounding") : non-uniform 'Rounding' sampler used > set.seed(290875) > library("party") Loading required package: grid Loading required package: mvtnorm Loading required package: modeltools Loading required package: stats4 Loading required package: strucchange Loading required package: zoo Attaching package: 'zoo' The following objects are masked from 'package:base': as.Date, as.Date.numeric Loading required package: sandwich > if (!require("TH.data")) + stop("cannot load package TH.data") Loading required package: TH.data Loading required package: survival Loading required package: MASS Attaching package: 'TH.data' The following object is masked from 'package:MASS': geyser > if (!require("coin")) + stop("cannot load package coin") Loading required package: coin > > data("GlaucomaM", package = "TH.data") > rf <- cforest(Class ~ ., data = GlaucomaM, control = cforest_unbiased(ntree = 30)) > stopifnot(mean(GlaucomaM$Class != predict(rf)) < + mean(GlaucomaM$Class != predict(rf, OOB = TRUE))) > > data("GBSG2", package = "TH.data") > rfS <- cforest(Surv(time, cens) ~ ., data = GBSG2, control = cforest_unbiased(ntree = 30)) > treeresponse(rfS, newdata = GBSG2[1:2,]) $`1` Call: survfit(formula = y ~ 1, weights = weights) records n events median 0.95LCL 0.95UCL [1,] 146 30 15.9 1753 1481 NA $`2` Call: survfit(formula = y ~ 1, weights = weights) records n events median 0.95LCL 0.95UCL [1,] 148 30 13.4 1975 1343 2018 > > ### give it a try, at least > varimp(rf, pre1.0_0 = TRUE) ag at as an ai 0.0000000000 -0.0023148148 0.0009259259 0.0009259259 0.0078703704 eag eat eas ean eai 0.0000000000 0.0000000000 0.0000000000 0.0013888889 -0.0009259259 abrg abrt abrs abrn abri 0.0000000000 0.0000000000 0.0032407407 0.0027777778 0.0041666667 hic mhcg mhct mhcs mhcn 0.0060185185 0.0000000000 0.0013888889 -0.0004629630 0.0027777778 mhci phcg phct phcs phcn 0.0078703704 0.0060185185 0.0000000000 0.0004629630 0.0018518519 phci hvc vbsg vbst vbss 0.0166666667 0.0032407407 0.0032407407 0.0013888889 0.0000000000 vbsn vbsi vasg vast vass 0.0000000000 0.0000000000 0.0000000000 0.0000000000 0.0000000000 vasn vasi vbrg vbrt vbrs 0.0000000000 0.0046296296 0.0000000000 0.0018518519 0.0004629630 vbrn vbri varg vart vars 0.0032407407 0.0004629630 0.0351851852 0.0000000000 0.0245370370 varn vari mdg mdt mds 0.0129629630 0.0481481481 0.0000000000 0.0000000000 -0.0013888889 mdn mdi tmg tmt tms 0.0000000000 0.0000000000 0.0273148148 0.0000000000 0.0097222222 tmn tmi mr rnf mdic -0.0023148148 0.0226851852 0.0000000000 0.0037037037 0.0055555556 emd mv 0.0000000000 -0.0009259259 > > P <- proximity(rf) > stopifnot(max(abs(P - t(P))) == 0) > > P[1:10,1:10] 2 43 25 65 70 16 6 2 1.0000000 0.26666667 0.7666667 0.20000000 0.10000000 0.13333333 0.70000000 43 0.2666667 1.00000000 0.2000000 0.03333333 0.06666667 0.36666667 0.23333333 25 0.7666667 0.20000000 1.0000000 0.26666667 0.10000000 0.10000000 0.76666667 65 0.2000000 0.03333333 0.2666667 1.00000000 0.00000000 0.03333333 0.33333333 70 0.1000000 0.06666667 0.1000000 0.00000000 1.00000000 0.23333333 0.06666667 16 0.1333333 0.36666667 0.1000000 0.03333333 0.23333333 1.00000000 0.10000000 6 0.7000000 0.23333333 0.7666667 0.33333333 0.06666667 0.10000000 1.00000000 5 0.5333333 0.06666667 0.6000000 0.46666667 0.10000000 0.06666667 0.63333333 12 0.5000000 0.06666667 0.5000000 0.50000000 0.10000000 0.06666667 0.53333333 63 0.4666667 0.23333333 0.5000000 0.23333333 0.16666667 0.13333333 0.56666667 5 12 63 2 0.53333333 0.50000000 0.4666667 43 0.06666667 0.06666667 0.2333333 25 0.60000000 0.50000000 0.5000000 65 0.46666667 0.50000000 0.2333333 70 0.10000000 0.10000000 0.1666667 16 0.06666667 0.06666667 0.1333333 6 0.63333333 0.53333333 0.5666667 5 1.00000000 0.83333333 0.4333333 12 0.83333333 1.00000000 0.5000000 63 0.43333333 0.50000000 1.0000000 > > ### variable importances > a <- cforest(Species ~ ., data = iris, + control = cforest_unbiased(mtry = 2, ntree = 10)) > varimp(a, pre1.0_0 = TRUE) Sepal.Length Sepal.Width Petal.Length Petal.Width 0.06181818 0.00000000 0.20727273 0.33636364 > varimp(a, conditional = TRUE) Sepal.Length Sepal.Width Petal.Length Petal.Width 0.007272727 0.000000000 0.103636364 0.243636364 > > airq <- subset(airquality, complete.cases(airquality)) > a <- cforest(Ozone ~ ., data = airq, + control = cforest_unbiased(mtry = 2, ntree = 10)) > varimp(a, pre1.0_0 = TRUE) Solar.R Wind Temp Month Day 137.76700 550.19004 295.40387 16.21802 5.42690 > varimp(a, conditional = TRUE) Solar.R Wind Temp Month Day 67.713060 341.413307 227.670123 4.257196 3.204209 > > data("mammoexp", package = "TH.data") > a <- cforest(ME ~ ., data = mammoexp, control = cforest_classical(ntree = 10)) > varimp(a, pre1.0_0 = TRUE) SYMPT PB HIST BSE DECT 0.02466021 0.01046237 0.01607246 0.01045324 0.00133305 > varimp(a, conditional = TRUE) SYMPT PB HIST BSE DECT 0.019882337 0.009532482 0.006163146 0.007732481 0.003382481 > > stopifnot(all.equal(unique(sapply(a@weights, sum)), nrow(mammoexp))) > > ### check user-defined weights > nobs <- nrow(GlaucomaM) > i <- rep(0.0, nobs) > i[1:floor(.632 * nobs)] <- 1 > folds <- replicate(100, sample(i)) > rf2 <- cforest(Class ~ ., data = GlaucomaM, control = cforest_unbiased(ntree = 100), weights = folds) > table(predict(rf), predict(rf2)) glaucoma normal glaucoma 89 4 normal 1 102 > > proc.time() user system elapsed 2.769 0.094 2.847 party/tests/Examples/0000755000176200001440000000000013436246771014346 5ustar liggesusersparty/tests/Examples/party-Ex.Rout.save0000644000176200001440000007032014110661343017653 0ustar liggesusers R version 4.1.1 (2021-08-10) -- "Kick Things" Copyright (C) 2021 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. Natural language support but running in an English locale R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > pkgname <- "party" > source(file.path(R.home("share"), "R", "examples-header.R")) > options(warn = 1) > library('party') Loading required package: grid Loading required package: mvtnorm Loading required package: modeltools Loading required package: stats4 Loading required package: strucchange Loading required package: zoo Attaching package: ‘zoo’ The following objects are masked from ‘package:base’: as.Date, as.Date.numeric Loading required package: sandwich > > base::assign(".oldSearch", base::search(), pos = 'CheckExEnv') > base::assign(".old_wd", base::getwd(), pos = 'CheckExEnv') > cleanEx() > nameEx("BinaryTree-class") > ### * BinaryTree-class > > flush(stderr()); flush(stdout()) > > ### Name: BinaryTree Class > ### Title: Class "BinaryTree" > ### Aliases: BinaryTree-class weights weights-methods > ### weights,BinaryTree-method show,BinaryTree-method where where-methods > ### where,BinaryTree-method response response-methods > ### response,BinaryTree-method nodes nodes-methods > ### nodes,BinaryTree,integer-method nodes,BinaryTree,numeric-method > ### treeresponse treeresponse-methods treeresponse,BinaryTree-method > ### Keywords: classes > > ### ** Examples > > > set.seed(290875) > > airq <- subset(airquality, !is.na(Ozone)) > airct <- ctree(Ozone ~ ., data = airq, + controls = ctree_control(maxsurrogate = 3)) > > ### distribution of responses in the terminal nodes > plot(airq$Ozone ~ as.factor(where(airct))) > > ### get all terminal nodes from the tree > nodes(airct, unique(where(airct))) [[1]] 5)* weights = 48 [[2]] 3)* weights = 10 [[3]] 6)* weights = 21 [[4]] 9)* weights = 7 [[5]] 8)* weights = 30 > > ### extract weights and compute predictions > pmean <- sapply(weights(airct), function(w) weighted.mean(airq$Ozone, w)) > > ### the same as > drop(Predict(airct)) [1] 18.47917 18.47917 18.47917 18.47917 18.47917 18.47917 18.47917 18.47917 [9] 55.60000 18.47917 18.47917 18.47917 18.47917 18.47917 18.47917 18.47917 [17] 18.47917 18.47917 18.47917 18.47917 18.47917 18.47917 18.47917 31.14286 [25] 55.60000 18.47917 31.14286 48.71429 48.71429 31.14286 18.47917 18.47917 [33] 18.47917 18.47917 18.47917 81.63333 81.63333 31.14286 81.63333 48.71429 [41] 81.63333 81.63333 81.63333 81.63333 18.47917 31.14286 31.14286 55.60000 [49] 31.14286 81.63333 81.63333 48.71429 55.60000 81.63333 81.63333 31.14286 [57] 48.71429 81.63333 81.63333 81.63333 31.14286 55.60000 31.14286 31.14286 [65] 81.63333 81.63333 81.63333 81.63333 81.63333 81.63333 48.71429 31.14286 [73] 31.14286 18.47917 55.60000 18.47917 31.14286 31.14286 18.47917 18.47917 [81] 31.14286 55.60000 81.63333 81.63333 81.63333 81.63333 81.63333 81.63333 [89] 81.63333 81.63333 81.63333 81.63333 48.71429 31.14286 31.14286 18.47917 [97] 18.47917 31.14286 18.47917 55.60000 18.47917 18.47917 55.60000 18.47917 [105] 18.47917 18.47917 31.14286 18.47917 18.47917 31.14286 18.47917 18.47917 [113] 55.60000 18.47917 18.47917 18.47917 > > ### or > unlist(treeresponse(airct)) [1] 18.47917 18.47917 18.47917 18.47917 18.47917 18.47917 18.47917 18.47917 [9] 55.60000 18.47917 18.47917 18.47917 18.47917 18.47917 18.47917 18.47917 [17] 18.47917 18.47917 18.47917 18.47917 18.47917 18.47917 18.47917 31.14286 [25] 55.60000 18.47917 31.14286 48.71429 48.71429 31.14286 18.47917 18.47917 [33] 18.47917 18.47917 18.47917 81.63333 81.63333 31.14286 81.63333 48.71429 [41] 81.63333 81.63333 81.63333 81.63333 18.47917 31.14286 31.14286 55.60000 [49] 31.14286 81.63333 81.63333 48.71429 55.60000 81.63333 81.63333 31.14286 [57] 48.71429 81.63333 81.63333 81.63333 31.14286 55.60000 31.14286 31.14286 [65] 81.63333 81.63333 81.63333 81.63333 81.63333 81.63333 48.71429 31.14286 [73] 31.14286 18.47917 55.60000 18.47917 31.14286 31.14286 18.47917 18.47917 [81] 31.14286 55.60000 81.63333 81.63333 81.63333 81.63333 81.63333 81.63333 [89] 81.63333 81.63333 81.63333 81.63333 48.71429 31.14286 31.14286 18.47917 [97] 18.47917 31.14286 18.47917 55.60000 18.47917 18.47917 55.60000 18.47917 [105] 18.47917 18.47917 31.14286 18.47917 18.47917 31.14286 18.47917 18.47917 [113] 55.60000 18.47917 18.47917 18.47917 > > ### don't use the mean but the median as prediction in each terminal node > pmedian <- sapply(weights(airct), function(w) + median(airq$Ozone[rep(1:nrow(airq), w)])) > > plot(airq$Ozone, pmean, col = "red") > points(airq$Ozone, pmedian, col = "blue") > > > > cleanEx() > nameEx("RandomForest-class") > ### * RandomForest-class > > flush(stderr()); flush(stdout()) > > ### Name: RandomForest-class > ### Title: Class "RandomForest" > ### Aliases: RandomForest-class treeresponse,RandomForest-method > ### weights,RandomForest-method where,RandomForest-method > ### show,RandomForest-method > ### Keywords: classes > > ### ** Examples > > > set.seed(290875) > > ### honest (i.e., out-of-bag) cross-classification of > ### true vs. predicted classes > data("mammoexp", package = "TH.data") > table(mammoexp$ME, predict(cforest(ME ~ ., data = mammoexp, + control = cforest_unbiased(ntree = 50)), + OOB = TRUE)) Never Within a Year Over a Year Never 189 29 16 Within a Year 58 43 3 Over a Year 56 18 0 > > > > cleanEx() > nameEx("Transformations") > ### * Transformations > > flush(stderr()); flush(stdout()) > > ### Name: Transformations > ### Title: Function for Data Transformations > ### Aliases: ptrafo ff_trafo > ### Keywords: manip > > ### ** Examples > > > ### rank a variable > ptrafo(data.frame(y = 1:20), + numeric_trafo = function(x) rank(x, na.last = "keep")) [1,] 1 [2,] 2 [3,] 3 [4,] 4 [5,] 5 [6,] 6 [7,] 7 [8,] 8 [9,] 9 [10,] 10 [11,] 11 [12,] 12 [13,] 13 [14,] 14 [15,] 15 [16,] 16 [17,] 17 [18,] 18 [19,] 19 [20,] 20 attr(,"assign") [1] 1 > > ### dummy coding of a factor > ptrafo(data.frame(y = gl(3, 9))) 1 2 3 1 1 0 0 2 1 0 0 3 1 0 0 4 1 0 0 5 1 0 0 6 1 0 0 7 1 0 0 8 1 0 0 9 1 0 0 10 0 1 0 11 0 1 0 12 0 1 0 13 0 1 0 14 0 1 0 15 0 1 0 16 0 1 0 17 0 1 0 18 0 1 0 19 0 0 1 20 0 0 1 21 0 0 1 22 0 0 1 23 0 0 1 24 0 0 1 25 0 0 1 26 0 0 1 27 0 0 1 attr(,"assign") [1] 1 1 1 > > > > > cleanEx() > nameEx("cforest") > ### * cforest > > flush(stderr()); flush(stdout()) > > ### Name: cforest > ### Title: Random Forest > ### Aliases: cforest proximity > ### Keywords: tree > > ### ** Examples > > > set.seed(290875) > > ### honest (i.e., out-of-bag) cross-classification of > ### true vs. predicted classes > data("mammoexp", package = "TH.data") > table(mammoexp$ME, predict(cforest(ME ~ ., data = mammoexp, + control = cforest_unbiased(ntree = 50)), + OOB = TRUE)) Never Within a Year Over a Year Never 189 29 16 Within a Year 58 43 3 Over a Year 56 18 0 > > ### fit forest to censored response > if (require("TH.data") && require("survival")) { + + data("GBSG2", package = "TH.data") + bst <- cforest(Surv(time, cens) ~ ., data = GBSG2, + control = cforest_unbiased(ntree = 50)) + + ### estimate conditional Kaplan-Meier curves + treeresponse(bst, newdata = GBSG2[1:2,], OOB = TRUE) + + ### if you can't resist to look at individual trees ... + party:::prettytree(bst@ensemble[[1]], names(bst@data@get("input"))) + } Loading required package: TH.data Loading required package: survival Loading required package: MASS Attaching package: ‘TH.data’ The following object is masked from ‘package:MASS’: geyser 1) pnodes <= 3; criterion = 1, statistic = 37.638 2) horTh == {}; criterion = 0.986, statistic = 6.053 3) pnodes <= 2; criterion = 0.905, statistic = 2.788 4) progrec <= 16; criterion = 0.761, statistic = 1.384 5)* weights = 0 4) progrec > 16 6) pnodes <= 1; criterion = 0.857, statistic = 2.149 7) progrec <= 154; criterion = 0.295, statistic = 0.143 8)* weights = 0 7) progrec > 154 9)* weights = 0 6) pnodes > 1 10)* weights = 0 3) pnodes > 2 11) age <= 54; criterion = 0.99, statistic = 6.605 12)* weights = 0 11) age > 54 13)* weights = 0 2) horTh == {} 14) menostat == {}; criterion = 0.895, statistic = 2.635 15) tsize <= 19; criterion = 0.541, statistic = 0.548 16) age <= 45; criterion = 0.979, statistic = 5.301 17)* weights = 0 16) age > 45 18)* weights = 0 15) tsize > 19 19) age <= 37; criterion = 0.943, statistic = 3.631 20)* weights = 0 19) age > 37 21) pnodes <= 2; criterion = 0.951, statistic = 3.866 22) age <= 49; criterion = 0.913, statistic = 2.922 23) tsize <= 23; criterion = 0.606, statistic = 0.728 24)* weights = 0 23) tsize > 23 25)* weights = 0 22) age > 49 26)* weights = 0 21) pnodes > 2 27)* weights = 0 14) menostat == {} 28) tgrade <= 1; criterion = 0.58, statistic = 0.65 29)* weights = 0 28) tgrade > 1 30) progrec <= 206; criterion = 0.874, statistic = 2.337 31) tsize <= 30; criterion = 0.847, statistic = 2.04 32) tgrade <= 2; criterion = 0.788, statistic = 1.558 33) pnodes <= 1; criterion = 0.141, statistic = 0.032 34)* weights = 0 33) pnodes > 1 35) tsize <= 23; criterion = 0.756, statistic = 1.356 36)* weights = 0 35) tsize > 23 37)* weights = 0 32) tgrade > 2 38)* weights = 0 31) tsize > 30 39)* weights = 0 30) progrec > 206 40)* weights = 0 1) pnodes > 3 41) horTh == {}; criterion = 0.981, statistic = 5.458 42) pnodes <= 13; criterion = 0.982, statistic = 5.549 43) progrec <= 19; criterion = 0.918, statistic = 3.019 44) tgrade <= 2; criterion = 0.887, statistic = 2.518 45)* weights = 0 44) tgrade > 2 46)* weights = 0 43) progrec > 19 47) menostat == {}; criterion = 0.977, statistic = 5.147 48)* weights = 0 47) menostat == {} 49) pnodes <= 6; criterion = 0.6, statistic = 3.518 50)* weights = 0 49) pnodes > 6 51)* weights = 0 42) pnodes > 13 52)* weights = 0 41) horTh == {} 53) estrec <= 79; criterion = 0.997, statistic = 8.922 54) progrec <= 132; criterion = 0.981, statistic = 5.529 55) estrec <= 38; criterion = 0.484, statistic = 0.422 56) age <= 59; criterion = 0.943, statistic = 3.615 57) tsize <= 20; criterion = 0.473, statistic = 0.399 58)* weights = 0 57) tsize > 20 59) progrec <= 0; criterion = 0.552, statistic = 0.576 60)* weights = 0 59) progrec > 0 61) estrec <= 2; criterion = 0.481, statistic = 0.416 62)* weights = 0 61) estrec > 2 63) progrec <= 20; criterion = 0.637, statistic = 1.917 64)* weights = 0 63) progrec > 20 65)* weights = 0 56) age > 59 66)* weights = 0 55) estrec > 38 67)* weights = 0 54) progrec > 132 68)* weights = 0 53) estrec > 79 69) tsize <= 21; criterion = 0.641, statistic = 0.875 70)* weights = 0 69) tsize > 21 71)* weights = 0 > > ### proximity, see ?randomForest > iris.cf <- cforest(Species ~ ., data = iris, + control = cforest_unbiased(mtry = 2)) > iris.mds <- cmdscale(1 - proximity(iris.cf), eig = TRUE) > op <- par(pty="s") > pairs(cbind(iris[,1:4], iris.mds$points), cex = 0.6, gap = 0, + col = c("red", "green", "blue")[as.numeric(iris$Species)], + main = "Iris Data: Predictors and MDS of Proximity Based on cforest") > par(op) > > > > > graphics::par(get("par.postscript", pos = 'CheckExEnv')) > cleanEx() detaching ‘package:TH.data’, ‘package:MASS’, ‘package:survival’ > nameEx("ctree") > ### * ctree > > flush(stderr()); flush(stdout()) > > ### Name: Conditional Inference Trees > ### Title: Conditional Inference Trees > ### Aliases: ctree conditionalTree > ### Keywords: tree > > ### ** Examples > > > set.seed(290875) > > ### regression > airq <- subset(airquality, !is.na(Ozone)) > airct <- ctree(Ozone ~ ., data = airq, + controls = ctree_control(maxsurrogate = 3)) > airct Conditional inference tree with 5 terminal nodes Response: Ozone Inputs: Solar.R, Wind, Temp, Month, Day Number of observations: 116 1) Temp <= 82; criterion = 1, statistic = 56.086 2) Wind <= 6.9; criterion = 0.998, statistic = 12.969 3)* weights = 10 2) Wind > 6.9 4) Temp <= 77; criterion = 0.997, statistic = 11.599 5)* weights = 48 4) Temp > 77 6)* weights = 21 1) Temp > 82 7) Wind <= 10.3; criterion = 0.997, statistic = 11.712 8)* weights = 30 7) Wind > 10.3 9)* weights = 7 > plot(airct) > mean((airq$Ozone - predict(airct))^2) [1] 403.6668 > ### extract terminal node ID, two ways > all.equal(predict(airct, type = "node"), where(airct)) [1] TRUE > > ### classification > irisct <- ctree(Species ~ .,data = iris) > irisct Conditional inference tree with 4 terminal nodes Response: Species Inputs: Sepal.Length, Sepal.Width, Petal.Length, Petal.Width Number of observations: 150 1) Petal.Length <= 1.9; criterion = 1, statistic = 140.264 2)* weights = 50 1) Petal.Length > 1.9 3) Petal.Width <= 1.7; criterion = 1, statistic = 67.894 4) Petal.Length <= 4.8; criterion = 0.999, statistic = 13.865 5)* weights = 46 4) Petal.Length > 4.8 6)* weights = 8 3) Petal.Width > 1.7 7)* weights = 46 > plot(irisct) > table(predict(irisct), iris$Species) setosa versicolor virginica setosa 50 0 0 versicolor 0 49 5 virginica 0 1 45 > > ### estimated class probabilities, a list > tr <- treeresponse(irisct, newdata = iris[1:10,]) > > ### ordinal regression > data("mammoexp", package = "TH.data") > mammoct <- ctree(ME ~ ., data = mammoexp) > plot(mammoct) > > ### estimated class probabilities > treeresponse(mammoct, newdata = mammoexp[1:10,]) [[1]] [1] 0.3990385 0.3798077 0.2211538 [[2]] [1] 0.84070796 0.05309735 0.10619469 [[3]] [1] 0.3990385 0.3798077 0.2211538 [[4]] [1] 0.6153846 0.2087912 0.1758242 [[5]] [1] 0.3990385 0.3798077 0.2211538 [[6]] [1] 0.3990385 0.3798077 0.2211538 [[7]] [1] 0.3990385 0.3798077 0.2211538 [[8]] [1] 0.3990385 0.3798077 0.2211538 [[9]] [1] 0.84070796 0.05309735 0.10619469 [[10]] [1] 0.3990385 0.3798077 0.2211538 > > ### survival analysis > if (require("TH.data") && require("survival")) { + data("GBSG2", package = "TH.data") + GBSG2ct <- ctree(Surv(time, cens) ~ .,data = GBSG2) + plot(GBSG2ct) + treeresponse(GBSG2ct, newdata = GBSG2[1:2,]) + } Loading required package: TH.data Loading required package: survival Loading required package: MASS Attaching package: ‘TH.data’ The following object is masked from ‘package:MASS’: geyser [[1]] Call: survfit(formula = y ~ 1, weights = weights) n events median 0.95LCL 0.95UCL [1,] 248 88 2093 1814 NA [[2]] Call: survfit(formula = y ~ 1, weights = weights) n events median 0.95LCL 0.95UCL [1,] 166 77 1701 1174 2018 > > ### if you are interested in the internals: > ### generate doxygen documentation > ## Not run: > ##D > ##D ### download src package into temp dir > ##D tmpdir <- tempdir() > ##D tgz <- download.packages("party", destdir = tmpdir)[2] > ##D ### extract > ##D untar(tgz, exdir = tmpdir) > ##D wd <- setwd(file.path(tmpdir, "party")) > ##D ### run doxygen (assuming it is there) > ##D system("doxygen inst/doxygen.cfg") > ##D setwd(wd) > ##D ### have fun > ##D browseURL(file.path(tmpdir, "party", "inst", > ##D "documentation", "html", "index.html")) > ##D > ## End(Not run) > > > > cleanEx() detaching ‘package:TH.data’, ‘package:MASS’, ‘package:survival’ > nameEx("mob") > ### * mob > > flush(stderr()); flush(stdout()) > > ### Name: mob > ### Title: Model-based Recursive Partitioning > ### Aliases: mob mob-class coef.mob deviance.mob fitted.mob logLik.mob > ### predict.mob print.mob residuals.mob sctest.mob summary.mob > ### weights.mob > ### Keywords: tree > > ### ** Examples > > > set.seed(290875) > > if(require("mlbench")) { + + ## recursive partitioning of a linear regression model + ## load data + data("BostonHousing", package = "mlbench") + ## and transform variables appropriately (for a linear regression) + BostonHousing$lstat <- log(BostonHousing$lstat) + BostonHousing$rm <- BostonHousing$rm^2 + ## as well as partitioning variables (for fluctuation testing) + BostonHousing$chas <- factor(BostonHousing$chas, levels = 0:1, + labels = c("no", "yes")) + BostonHousing$rad <- factor(BostonHousing$rad, ordered = TRUE) + + ## partition the linear regression model medv ~ lstat + rm + ## with respect to all remaining variables: + fmBH <- mob(medv ~ lstat + rm | zn + indus + chas + nox + age + + dis + rad + tax + crim + b + ptratio, + control = mob_control(minsplit = 40), data = BostonHousing, + model = linearModel) + + ## print the resulting tree + fmBH + ## or better visualize it + plot(fmBH) + + ## extract coefficients in all terminal nodes + coef(fmBH) + ## look at full summary, e.g., for node 7 + summary(fmBH, node = 7) + ## results of parameter stability tests for that node + sctest(fmBH, node = 7) + ## -> no further significant instabilities (at 5% level) + + ## compute mean squared error (on training data) + mean((BostonHousing$medv - fitted(fmBH))^2) + mean(residuals(fmBH)^2) + deviance(fmBH)/sum(weights(fmBH)) + + ## evaluate logLik and AIC + logLik(fmBH) + AIC(fmBH) + ## (Note that this penalizes estimation of error variances, which + ## were treated as nuisance parameters in the fitting process.) + + + ## recursive partitioning of a logistic regression model + ## load data + data("PimaIndiansDiabetes", package = "mlbench") + ## partition logistic regression diabetes ~ glucose + ## wth respect to all remaining variables + fmPID <- mob(diabetes ~ glucose | pregnant + pressure + triceps + + insulin + mass + pedigree + age, + data = PimaIndiansDiabetes, model = glinearModel, + family = binomial()) + + ## fitted model + coef(fmPID) + plot(fmPID) + plot(fmPID, tp_args = list(cdplot = TRUE)) + } Loading required package: mlbench > > > > cleanEx() detaching ‘package:mlbench’ > nameEx("panelfunctions") > ### * panelfunctions > > flush(stderr()); flush(stdout()) > > ### Name: Panel Generating Functions > ### Title: Panel-Generators for Visualization of Party Trees > ### Aliases: node_inner node_terminal edge_simple node_surv node_barplot > ### node_boxplot node_hist node_density node_scatterplot node_bivplot > ### Keywords: hplot > > ### ** Examples > > > set.seed(290875) > > airq <- subset(airquality, !is.na(Ozone)) > airct <- ctree(Ozone ~ ., data = airq) > > ## default: boxplots > plot(airct) > > ## change colors > plot(airct, tp_args = list(col = "blue", fill = hsv(2/3, 0.5, 1))) > ## equivalent to > plot(airct, terminal_panel = node_boxplot(airct, col = "blue", + fill = hsv(2/3, 0.5, 1))) > > ### very simple; the mean is given in each terminal node > plot(airct, type = "simple") > > ### density estimates > plot(airct, terminal_panel = node_density) > > ### histograms > plot(airct, terminal_panel = node_hist(airct, ymax = 0.06, + xscale = c(0, 250))) > > > > cleanEx() > nameEx("plot.BinaryTree") > ### * plot.BinaryTree > > flush(stderr()); flush(stdout()) > > ### Name: Plot BinaryTree > ### Title: Visualization of Binary Regression Trees > ### Aliases: plot.BinaryTree > ### Keywords: hplot > > ### ** Examples > > > set.seed(290875) > > airq <- subset(airquality, !is.na(Ozone)) > airct <- ctree(Ozone ~ ., data = airq) > > ### regression: boxplots in each node > plot(airct, terminal_panel = node_boxplot, drop_terminal = TRUE) > > if(require("TH.data")) { + ## classification: barplots in each node + data("GlaucomaM", package = "TH.data") + glauct <- ctree(Class ~ ., data = GlaucomaM) + plot(glauct) + plot(glauct, inner_panel = node_barplot, + edge_panel = function(ctreeobj, ...) { function(...) invisible() }, + tnex = 1) + + ## survival: Kaplan-Meier curves in each node + data("GBSG2", package = "TH.data") + library("survival") + gbsg2ct <- ctree(Surv(time, cens) ~ ., data = GBSG2) + plot(gbsg2ct) + plot(gbsg2ct, type = "simple") + } Loading required package: TH.data Loading required package: survival Loading required package: MASS Attaching package: ‘TH.data’ The following object is masked from ‘package:MASS’: geyser > > > > > cleanEx() detaching ‘package:TH.data’, ‘package:MASS’, ‘package:survival’ > nameEx("plot.mob") > ### * plot.mob > > flush(stderr()); flush(stdout()) > > ### Name: plot.mob > ### Title: Visualization of MOB Trees > ### Aliases: plot.mob > ### Keywords: hplot > > ### ** Examples > > > set.seed(290875) > > if(require("mlbench")) { + + ## recursive partitioning of a linear regression model + ## load data + data("BostonHousing", package = "mlbench") + ## and transform variables appropriately (for a linear regression) + BostonHousing$lstat <- log(BostonHousing$lstat) + BostonHousing$rm <- BostonHousing$rm^2 + ## as well as partitioning variables (for fluctuation testing) + BostonHousing$chas <- factor(BostonHousing$chas, levels = 0:1, + labels = c("no", "yes")) + BostonHousing$rad <- factor(BostonHousing$rad, ordered = TRUE) + + ## partition the linear regression model medv ~ lstat + rm + ## with respect to all remaining variables: + fm <- mob(medv ~ lstat + rm | zn + indus + chas + nox + age + dis + + rad + tax + crim + b + ptratio, + control = mob_control(minsplit = 40), data = BostonHousing, + model = linearModel) + + ## visualize medv ~ lstat and medv ~ rm + plot(fm) + + ## visualize only one of the two regressors + plot(fm, tp_args = list(which = "lstat"), tnex = 2) + plot(fm, tp_args = list(which = 2), tnex = 2) + + ## omit fitted mean lines + plot(fm, tp_args = list(fitmean = FALSE)) + + ## mixed numerical and categorical regressors + fm2 <- mob(medv ~ lstat + rm + chas | zn + indus + nox + age + + dis + rad, + control = mob_control(minsplit = 100), data = BostonHousing, + model = linearModel) + plot(fm2) + + ## recursive partitioning of a logistic regression model + data("PimaIndiansDiabetes", package = "mlbench") + fmPID <- mob(diabetes ~ glucose | pregnant + pressure + triceps + + insulin + mass + pedigree + age, + data = PimaIndiansDiabetes, model = glinearModel, + family = binomial()) + ## default plot: spinograms with breaks from five point summary + plot(fmPID) + ## use the breaks from hist() instead + plot(fmPID, tp_args = list(fivenum = FALSE)) + ## user-defined breaks + plot(fmPID, tp_args = list(breaks = 0:4 * 50)) + ## CD plots instead of spinograms + plot(fmPID, tp_args = list(cdplot = TRUE)) + ## different smoothing bandwidth + plot(fmPID, tp_args = list(cdplot = TRUE, bw = 15)) + + } Loading required package: mlbench > > > > cleanEx() detaching ‘package:mlbench’ > nameEx("readingSkills") > ### * readingSkills > > flush(stderr()); flush(stdout()) > > ### Name: readingSkills > ### Title: Reading Skills > ### Aliases: readingSkills > ### Keywords: datasets > > ### ** Examples > > > set.seed(290875) > readingSkills.cf <- cforest(score ~ ., data = readingSkills, + control = cforest_unbiased(mtry = 2, ntree = 50)) > > # standard importance > varimp(readingSkills.cf) nativeSpeaker age shoeSize 12.69213 82.26737 13.60017 > # the same modulo random variation > varimp(readingSkills.cf, pre1.0_0 = TRUE) nativeSpeaker age shoeSize 12.88414 79.09714 15.37933 > > # conditional importance, may take a while... > varimp(readingSkills.cf, conditional = TRUE) nativeSpeaker age shoeSize 11.466498 51.125596 1.521413 > > > > > cleanEx() > nameEx("reweight") > ### * reweight > > flush(stderr()); flush(stdout()) > > ### Name: reweight > ### Title: Re-fitting Models with New Weights > ### Aliases: reweight reweight.linearModel reweight.glinearModel > ### Keywords: regression > > ### ** Examples > > ## fit cars regression > mf <- dpp(linearModel, dist ~ speed, data = cars) > fm <- fit(linearModel, mf) > fm Linear model with coefficients: (Intercept) speed -17.579 3.932 > > ## re-fit, excluding the last 4 observations > ww <- c(rep(1, 46), rep(0, 4)) > reweight(fm, ww) Linear model with coefficients: (Intercept) speed -8.723 3.210 > > > > cleanEx() > nameEx("varimp") > ### * varimp > > flush(stderr()); flush(stdout()) > > ### Name: varimp > ### Title: Variable Importance > ### Aliases: varimp varimpAUC > ### Keywords: tree > > ### ** Examples > > > set.seed(290875) > readingSkills.cf <- cforest(score ~ ., data = readingSkills, + control = cforest_unbiased(mtry = 2, ntree = 50)) > > # standard importance > varimp(readingSkills.cf) nativeSpeaker age shoeSize 12.69213 82.26737 13.60017 > # the same modulo random variation > varimp(readingSkills.cf, pre1.0_0 = TRUE) nativeSpeaker age shoeSize 12.88414 79.09714 15.37933 > > # conditional importance, may take a while... > varimp(readingSkills.cf, conditional = TRUE) nativeSpeaker age shoeSize 11.466498 51.125596 1.521413 > > ## Not run: > ##D data("GBSG2", package = "TH.data") > ##D ### add a random covariate for sanity check > ##D set.seed(29) > ##D GBSG2$rand <- runif(nrow(GBSG2)) > ##D object <- cforest(Surv(time, cens) ~ ., data = GBSG2, > ##D control = cforest_unbiased(ntree = 20)) > ##D vi <- varimp(object) > ##D ### compare variable importances and absolute z-statistics > ##D layout(matrix(1:2)) > ##D barplot(vi) > ##D barplot(abs(summary(coxph(Surv(time, cens) ~ ., data = GBSG2))$coeff[,"z"])) > ##D ### looks more or less the same > ##D > ## End(Not run) > > > > ### *