vioplot/0000755000176200001440000000000014344672512011756 5ustar liggesusersvioplot/NAMESPACE0000755000176200001440000000177614344507152013210 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(histoplot,data.frame) S3method(histoplot,default) S3method(histoplot,formula) S3method(histoplot,list) S3method(histoplot,matrix) S3method(vioplot,data.frame) S3method(vioplot,default) S3method(vioplot,formula) S3method(vioplot,list) S3method(vioplot,matrix) S3method(vioplot,stats) export(histoplot) export(vioplot) import(sm) importFrom(grDevices,boxplot.stats) importFrom(grDevices,dev.flush) importFrom(grDevices,dev.hold) importFrom(grDevices,dev.interactive) importFrom(grDevices,devAskNewPage) importFrom(grDevices,xy.coords) importFrom(graphics,Axis) importFrom(graphics,axis) importFrom(graphics,box) importFrom(graphics,lines) importFrom(graphics,par) importFrom(graphics,plot.new) importFrom(graphics,plot.window) importFrom(graphics,plot.xy) importFrom(graphics,points) importFrom(graphics,polygon) importFrom(graphics,rect) importFrom(graphics,title) importFrom(stats,median) importFrom(stats,na.omit) importFrom(stats,quantile) importFrom(zoo,rollmean) vioplot/LICENSE0000644000176200001440000000012113507646542012761 0ustar liggesusersYEAR: 2004 COPYRIGHT HOLDER: Daniel Adler ORGANIZATION: University of Goettingen vioplot/man/0000755000176200001440000000000014344404314012522 5ustar liggesusersvioplot/man/violin.stats.Rd0000644000176200001440000000220114344404314015441 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vioplot.stats.R \name{vioplot.stats} \alias{vioplot.stats} \alias{violin.stats} \alias{violinplot.stats} \title{Violin Plot Statistics} \usage{ \method{vioplot}{stats}(x, coef = 1.5, do.conf = TRUE, do.out = TRUE, ...) } \arguments{ \item{x}{a numeric vector for which the violin plot will be constructed \code{NA}s and \code{NaN}s are allowed and omitted).} \item{coef}{this determines how far the plot ‘whiskers’ extend out from the box. If coef is positive, the whiskers extend to the most extreme data point which is no more than coef times the length of the box away from the box. A value of zero causes the whiskers to extend to the data extremes (and no outliers be returned).} \item{do.conf, do.out}{logicals; if FALSE, the conf or out component respectively will be empty in the result.} \item{...}{arguments passed to \code{\link[vioplot]{vioplot}}.} } \description{ This function is typically called by another function to gather the statistics necessary for producing box plots, but may be invoked separately. See: \code{\link[grDevices]{boxplot.stats}} } vioplot/man/vioplot.Rd0000755000176200001440000003065614344404314014522 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/vioplot.R \name{vioplot} \alias{vioplot} \alias{violinplot} \alias{vioplot.matrix} \alias{violin.matrix} \alias{violinplot.matrix} \alias{vioplot.list} \alias{vioplot.data.frame} \alias{vioplot.formula} \alias{vioplot.default} \title{Violin Plot} \usage{ \method{vioplot}{matrix}(x, use.cols = TRUE, ...) \method{vioplot}{list}(x, ...) \method{vioplot}{data.frame}(x, ...) \method{vioplot}{matrix}(x, use.cols = TRUE, ...) \method{vioplot}{formula}( formula, data = NULL, ..., subset, na.action = NULL, add = FALSE, ann = !add, horizontal = FALSE, side = "both", xlab = mklab(y_var = horizontal), ylab = mklab(y_var = !horizontal), names = NULL, drop = FALSE, sep = ".", lex.order = FALSE ) \method{vioplot}{default}( x, ..., data = NULL, range = 1.5, h = NULL, xlim = NULL, ylim = NULL, names = NULL, horizontal = FALSE, col = "grey50", border = par()$fg, lty = 1, lwd = 1, rectCol = par()$fg, lineCol = par()$fg, pchMed = 19, colMed = "white", colMed2 = "grey 75", at, add = FALSE, wex = 1, drawRect = TRUE, areaEqual = FALSE, axes = TRUE, frame.plot = axes, panel.first = NULL, panel.last = NULL, asp = NA, main = "", sub = "", xlab = NA, ylab = NA, line = NA, outer = FALSE, xlog = NA, ylog = NA, adj = NA, ann = NA, ask = NA, bg = NA, bty = NA, cex = NA, cex.axis = NA, cex.lab = NA, cex.main = NA, cex.names = NULL, cex.sub = NA, cin = NA, col.axis = NA, col.lab = NA, col.main = NA, col.sub = NA, cra = NA, crt = NA, csi = NA, cxy = NA, din = NA, err = NA, family = NA, fg = NA, fig = NA, fin = NA, font = NA, font.axis = NA, font.lab = NA, font.main = NA, font.sub = NA, lab = NA, las = NA, lend = NA, lheight = NA, ljoin = NA, lmitre = NA, mai = NA, mar = NA, mex = NA, mfcol = NA, mfg = NA, mfrow = NA, mgp = NA, mkh = NA, new = NA, oma = NA, omd = NA, omi = NA, page = NA, pch = NA, pin = NA, plt = NA, ps = NA, pty = NA, smo = NA, srt = NA, tck = NA, tcl = NA, usr = NA, xaxp = NA, xaxs = NA, xaxt = NA, xpd = NA, yaxp = NA, yaxs = NA, yaxt = NA, ylbias = NA, log = "", logLab = c(1, 2, 5), na.action = NULL, na.rm = T, side = "both", plotCentre = "point" ) } \arguments{ \item{x}{a numeric matrix.} \item{...}{Further arguments to \code{\link[vioplot]{vioplot}}.} \item{use.cols}{logical indicating if columns (by default) or rows (use.cols = FALSE) should be plotted.} \item{formula}{a formula, such as y ~ grp, where y is a numeric vector of data values to be split into groups according to the grouping variable grp (usually a factor).} \item{data}{a data.frame (or list) from which the variables in formula should be taken.} \item{subset}{an optional vector specifying a subset of observations to be used for plotting.} \item{na.action}{a function which indicates what should happen when the data contain NAs. The default is to ignore missing values in either the response or the group.} \item{add}{logical. if FALSE (default) a new plot is created} \item{horizontal}{logical. To use horizontal or vertical violins. Note that log scale can only be used on the x-axis for horizontal violins, and on the y-axis otherwise.} \item{side}{defaults to "both". Assigning "left" or "right" enables one sided plotting of violins. May be applied as a scalar across all groups.} \item{names}{one label, or a vector of labels for the data must match the number of data given} \item{drop, sep, lex.order}{defines groups to plot from formula, passed to \code{split.default}, see there.} \item{range}{a factor to calculate the upper/lower adjacent values} \item{h}{the height for the density estimator, if omit as explained in sm.density, h will be set to an optimum. A vector of length one, two or three, defining the smoothing parameter. A normal kernel function is used and h is its standard deviation. If this parameter is omitted, a normal optimal smoothing parameter is used.} \item{xlim, ylim}{numeric vectors of length 2, giving the x and y coordinates ranges.} \item{col}{Graphical parameter for fill colour of the violin(s) polygon. NA for no fill colour. If col is a vector, it specifies the colour per violin, and colours are reused if necessary.} \item{border}{Graphical parameters for the colour of the violin border passed to lines. NA for no border. If border is a vector, it specifies the colour per violin, and colours are reused if necessary.} \item{lty, lwd}{Graphical parameters for the violin passed to lines and polygon} \item{rectCol}{Graphical parameters to control fill colour of the box. NA for no fill colour. If col is a vector, it specifies the colour per violin, and colours are reused if necessary.} \item{lineCol}{Graphical parameters to control colour of the box outline and whiskers. NA for no border. If lineCol is a vector, it specifies the colour per violin, and colours are reused if necessary.} \item{pchMed}{Graphical parameters to control shape of the median point. If pchMed is a vector, it specifies the shape per violin.} \item{colMed, colMed2}{Graphical parameters to control colour of the median point. If colMed is a vector, it specifies the colour per violin. colMed specifies the fill colour in all cases unless pchMed is 21:25 in which case colMed is the border colour and colMed2 is the fill colour.} \item{at}{position of each violin. Default to 1:n} \item{wex}{relative expansion of the violin. If wex is a vector, it specifies the area/width size per violin and sizes are reused if necessary.} \item{drawRect}{logical. The box is drawn if TRUE.} \item{areaEqual}{logical. Density plots checked for equal area if TRUE. wex must be scalar, relative widths of violins depend on area.} \item{axes, frame.plot, panel.first, panel.last, asp, line, outer, adj, ann, ask, bg, bty, cin, col.axis, col.lab, col.main, col.sub, cra, crt, csi, cxy, din, err, family, fg, fig, fin, font, font.axis, font.lab, font.main, font.sub, lab, las, lend, lheight, ljoin, lmitre, mai, mar, mex, mfcol, mfg, mfrow, mgp, mkh, new, oma, omd, omi, page, pch, pin, plt, ps, pty, smo, srt, tck, tcl, usr, xaxp, xaxs, xaxt, xpd, yaxp, yaxs, ylbias}{Arguments to be passed to methods, such as graphical parameters (see \code{\link[graphics]{par}})).} \item{main, sub, xlab, ylab}{graphical parameters passed to plot.} \item{ylog, xlog}{A logical value (see log in \code{\link[graphics]{plot.default}}). If ylog is TRUE, a logarithmic scale is in use (e.g., after plot(*, log = "y")). For horizontal = TRUE then, if xlog is TRUE, a logarithmic scale is in use (e.g., after plot(*, log = "x")). For a new device, it defaults to FALSE, i.e., linear scale.} \item{cex}{A numerical value giving the amount by which plotting text should be magnified relative to the default.} \item{cex.axis}{The magnification to be used for y axis annotation relative to the current setting of cex.} \item{cex.lab}{The magnification to be used for x and y labels relative to the current setting of cex.} \item{cex.main}{The magnification to be used for main titles relative to the current setting of cex.} \item{cex.names}{The magnification to be used for x axis annotation relative to the current setting of cex. Takes the value of cex.axis if not given.} \item{cex.sub}{The magnification to be used for sub-titles relative to the current setting of cex.} \item{yaxt}{A character which specifies the y axis type. Specifying "n" suppresses plotting.} \item{log}{Logarithmic scale if log = "y" or TRUE. Invokes ylog = TRUE. If horizontal is TRUE then invokes xlog = TRUE.} \item{logLab}{Increments for labelling y-axis on log-scale, defaults to numbers starting with 1, 2, 5, and 10.} \item{na.rm}{logical value indicating whether NA values should be stripped before the computation proceeds. Defaults to TRUE.} \item{plotCentre}{defaults to "points", plotting a central point at the median. If "line" is given a median line is plotted (subject to side) alternatively.} } \description{ Produce violin plot(s) of the given (grouped) values with enhanced annotation and colour per group. Includes customisation of colours for each aspect of the violin, boxplot, and separate violins. This supports input of data as a list or formula, being backwards compatible with \code{\link[vioplot]{vioplot}} (0.2) and taking input in a formula as used for \code{\link[graphics]{boxplot}}. Interpreting the columns (or rows) of a matrix as different groups, draw a boxplot for each. } \examples{ # box- vs violin-plot par(mfrow=c(2,1)) mu<-2 si<-0.6 bimodal<-c(rnorm(1000,-mu,si),rnorm(1000,mu,si)) uniform<-runif(2000,-4,4) normal<-rnorm(2000,0,3) vioplot(bimodal,uniform,normal) boxplot(bimodal,uniform,normal) # add to an existing plot x <- rnorm(100) y <- rnorm(100) plot(x, y, xlim=c(-5,5), ylim=c(-5,5)) vioplot(x, col="tomato", horizontal=TRUE, at=-4, add=TRUE,lty=2, rectCol="gray") vioplot(y, col="cyan", horizontal=FALSE, at=-4, add=TRUE,lty=2) # formula input data("iris") vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) data("diamonds", package = "ggplot2") palette <- RColorBrewer::brewer.pal(9, "Pastel1") par(mfrow=c(3, 1)) vioplot(price ~ cut, data = diamonds, las = 1, col = palette) vioplot(price ~ clarity, data = diamonds, las = 2, col = palette) vioplot(price ~ color, data = diamonds, las = 2, col = palette) par(mfrow=c(3, 1)) #generate example data data_one <- rnorm(100) data_two <- rnorm(50, 1, 2) #generate violin plot with similar functionality to vioplot vioplot(data_one, data_two, col="magenta") #note vioplox defaults to a greyscale plot vioplot(data_one, data_two) #colours can be customised separately, with axis labels, legends, and titles vioplot(data_one, data_two, col=c("red","blue"), names=c("data one", "data two"), main="data violin", xlab="data class", ylab="data read") legend("topleft", fill=c("red","blue"), legend=c("data one", "data two")) #colours can be customised for the violin fill and border separately vioplot(data_one, data_two, col="grey85", border="purple", names=c("data one", "data two"), main="data violin", xlab="data class", ylab="data read") #colours can also be customised for the boxplot rectange and lines (border and whiskers) vioplot(data_one, data_two, col="grey85", rectCol="lightblue", lineCol="blue", border="purple", names=c("data one", "data two"), main="data violin", xlab="data class", ylab="data read") #these colours can also be customised separately for each violin vioplot(data_one, data_two, col=c("skyblue", "plum"), rectCol=c("lightblue", "palevioletred"), lineCol="blue", border=c("royalblue", "purple"), names=c("data one", "data two"), main="data violin", xlab="data class", ylab="data read") #this applies to any number of violins, given that colours are provided for each vioplot(data_one, data_two, rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), col=c("red", "orange", "green", "blue", "violet"), rectCol=c("palevioletred", "peachpuff", "lightgreen", "lightblue", "plum"), lineCol=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), border=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), names=c("data one", "data two", "data three", "data four", "data five"), main="data violin", xlab="data class", ylab="data read") #The areaEqual parameter scales with width of violins #Violins will have equal density area (including missing tails) rather than equal maximum width vioplot(data_one, data_two, areaEqual=TRUE) vioplot(data_one, data_two, areaEqual=TRUE, col=c("skyblue", "plum"), rectCol=c("lightblue", "palevioletred"), lineCol="blue", border=c("royalblue", "purple"), names=c("data one", "data two"), main="data violin", xlab="data class", ylab="data read") vioplot(data_one, data_two, rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), areaEqual=TRUE, col=c("red", "orange", "green", "blue", "violet"), rectCol=c("palevioletred", "peachpuff", "lightgreen", "lightblue", "plum"), lineCol=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), border=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), names=c("data one", "data two", "data three", "data four", "data five"), main="data violin", xlab="data class", ylab="data read") } \keyword{graphics} \keyword{plot} \keyword{violin} vioplot/man/histoplot.Rd0000644000176200001440000003005514344404314015041 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/histoplot.R \name{histoplot} \alias{histoplot} \alias{histoplot.matrix} \alias{histogram.matrix} \alias{histoplot.list} \alias{histoplot.data.frame} \alias{histoplot.formula} \alias{histoplot.default} \title{histoplot} \usage{ \method{histoplot}{matrix}(x, use.cols = TRUE, ...) \method{histoplot}{list}(x, ...) \method{histoplot}{data.frame}(x, ...) \method{histoplot}{matrix}(x, use.cols = TRUE, ...) \method{histoplot}{formula}( formula, data = NULL, ..., subset, na.action = NULL, add = FALSE, ann = !add, horizontal = FALSE, side = "both", xlab = mklab(y_var = horizontal), ylab = mklab(y_var = !horizontal), names = NULL, drop = FALSE, sep = ".", lex.order = FALSE ) \method{histoplot}{default}( x, ..., data = NULL, breaks = "Sturges", xlim = NULL, ylim = NULL, names = NULL, horizontal = FALSE, col = "grey50", border = par()$fg, lty = 1, lwd = 1, rectCol = par()$fg, lineCol = par()$fg, pchMed = 19, colMed = "white", colMed2 = "grey 75", at, add = FALSE, wex = 1, drawRect = TRUE, areaEqual = FALSE, axes = TRUE, frame.plot = axes, panel.first = NULL, panel.last = NULL, asp = NA, main = "", sub = "", xlab = NA, ylab = NA, line = NA, outer = FALSE, xlog = NA, ylog = NA, adj = NA, ann = NA, ask = NA, bg = NA, bty = NA, cex = NA, cex.axis = NA, cex.lab = NA, cex.main = NA, cex.names = NULL, cex.sub = NA, cin = NA, col.axis = NA, col.lab = NA, col.main = NA, col.sub = NA, cra = NA, crt = NA, csi = NA, cxy = NA, din = NA, err = NA, family = NA, fg = NA, fig = NA, fin = NA, font = NA, font.axis = NA, font.lab = NA, font.main = NA, font.sub = NA, lab = NA, las = NA, lend = NA, lheight = NA, ljoin = NA, lmitre = NA, mai = NA, mar = NA, mex = NA, mfcol = NA, mfg = NA, mfrow = NA, mgp = NA, mkh = NA, new = NA, oma = NA, omd = NA, omi = NA, page = NA, pch = NA, pin = NA, plt = NA, ps = NA, pty = NA, smo = NA, srt = NA, tck = NA, tcl = NA, usr = NA, xaxp = NA, xaxs = NA, xaxt = NA, xpd = NA, yaxp = NA, yaxs = NA, yaxt = NA, ylbias = NA, log = "", logLab = c(1, 2, 5), na.action = NULL, na.rm = T, side = "both" ) } \arguments{ \item{x}{a numeric matrix.} \item{...}{Further arguments to \code{\link[vioplot]{histoplot}}.} \item{use.cols}{logical indicating if columns (by default) or rows (use.cols = FALSE) should be plotted.} \item{formula}{a formula, such as y ~ grp, where y is a numeric vector of data values to be split into groups according to the grouping variable grp (usually a factor).} \item{data}{a data.frame (or list) from which the variables in formula should be taken.} \item{subset}{an optional vector specifying a subset of observations to be used for plotting.} \item{na.action}{a function which indicates what should happen when the data contain NAs. The default is to ignore missing values in either the response or the group.} \item{add}{logical. if FALSE (default) a new plot is created} \item{horizontal}{logical. To use horizontal or vertical histograms. Note that log scale can only be used on the x-axis for horizontal histograms, and on the y-axis otherwise.} \item{side}{defaults to "both". Assigning "left" or "right" enables one sided plotting of histograms. May be applied as a scalar across all groups.} \item{names}{one label, or a vector of labels for the data must match the number of data given} \item{drop, sep, lex.order}{defines groups to plot from formula, passed to \code{split.default}, see there.} \item{breaks}{the breaks for the density estimator, as explained in hist} \item{xlim, ylim}{numeric vectors of length 2, giving the x and y coordinates ranges.} \item{col}{Graphical parameter for fill colour of the histogram(s) polygon. NA for no fill colour. If col is a vector, it specifies the colour per histogram, and colours are reused if necessary.} \item{border}{Graphical parameters for the colour of the histogram border passed to lines. NA for no border. If border is a vector, it specifies the colour per histogram, and colours are reused if necessary.} \item{lty, lwd}{Graphical parameters for the histogram passed to lines and polygon} \item{rectCol}{Graphical parameters to control fill colour of the box. NA for no fill colour. If col is a vector, it specifies the colour per histogram, and colours are reused if necessary.} \item{lineCol}{Graphical parameters to control colour of the box outline and whiskers. NA for no border. If lineCol is a vector, it specifies the colour per histogram, and colours are reused if necessary.} \item{pchMed}{Graphical parameters to control shape of the median point. If pchMed is a vector, it specifies the shape per histogram.} \item{colMed, colMed2}{Graphical parameters to control colour of the median point. If colMed is a vector, it specifies the colour per histogram. colMed specifies the fill colour in all cases unless pchMed is 21:25 in which case colMed is the border colour and colMed2 is the fill colour.} \item{at}{position of each histogram. Default to 1:n} \item{wex}{relative expansion of the histogram. If wex is a vector, it specifies the area/width size per histogram and sizes are reused if necessary.} \item{drawRect}{logical. The box is drawn if TRUE.} \item{areaEqual}{logical. Density plots checked for equal area if TRUE. wex must be scalar, relative widths of histograms depend on area.} \item{axes, frame.plot, panel.first, panel.last, asp, line, outer, adj, ann, ask, bg, bty, cin, col.axis, col.lab, col.main, col.sub, cra, crt, csi, cxy, din, err, family, fg, fig, fin, font, font.axis, font.lab, font.main, font.sub, lab, las, lend, lheight, ljoin, lmitre, mai, mar, mex, mfcol, mfg, mfrow, mgp, mkh, new, oma, omd, omi, page, pch, pin, plt, ps, pty, smo, srt, tck, tcl, usr, xaxp, xaxs, xaxt, xpd, yaxp, yaxs, ylbias}{Arguments to be passed to methods, such as graphical parameters (see \code{\link[graphics]{par}})).} \item{main, sub, xlab, ylab}{graphical parameters passed to plot.} \item{ylog, xlog}{A logical value (see log in \code{\link[graphics]{plot.default}}). If ylog is TRUE, a logarithmic scale is in use (e.g., after plot(*, log = "y")). For horizontal = TRUE then, if xlog is TRUE, a logarithmic scale is in use (e.g., after plot(*, log = "x")). For a new device, it defaults to FALSE, i.e., linear scale.} \item{cex}{A numerical value giving the amount by which plotting text should be magnified relative to the default.} \item{cex.axis}{The magnification to be used for y axis annotation relative to the current setting of cex.} \item{cex.lab}{The magnification to be used for x and y labels relative to the current setting of cex.} \item{cex.main}{The magnification to be used for main titles relative to the current setting of cex.} \item{cex.names}{The magnification to be used for x axis annotation relative to the current setting of cex. Takes the value of cex.axis if not given.} \item{cex.sub}{The magnification to be used for sub-titles relative to the current setting of cex.} \item{yaxt}{A character which specifies the y axis type. Specifying "n" suppresses plotting.} \item{log}{Logarithmic scale if log = "y" or TRUE. Invokes ylog = TRUE. If horizontal is TRUE then invokes xlog = TRUE.} \item{logLab}{Increments for labelling y-axis on log-scale, defaults to numbers starting with 1, 2, 5, and 10.} \item{na.rm}{logical value indicating whether NA values should be stripped before the computation proceeds. Defaults to TRUE.} } \description{ Produce histogram plot(s) of the given (grouped) values with enhanced annotation and colour per group. Includes customisation of colours for each aspect of the histogram, boxplot, and separate histograms. This supports input of data as a list or formula, being backwards compatible with \code{\link[vioplot]{histoplot}} (0.2) and taking input in a formula as used for \code{\link[graphics]{boxplot}}. Interpreting the columns (or rows) of a matrix as different groups, draw a boxplot for each. } \examples{ # box- vs histogram-plot par(mfrow=c(2,1)) mu<-2 si<-0.6 bimodal<-c(rnorm(1000,-mu,si),rnorm(1000,mu,si)) uniform<-runif(2000,-4,4) normal<-rnorm(2000,0,3) histoplot(bimodal,uniform,normal) boxplot(bimodal,uniform,normal) # add to an existing plot x <- rnorm(100) y <- rnorm(100) plot(x, y, xlim=c(-5,5), ylim=c(-5,5)) histoplot(x, col="tomato", horizontal=TRUE, at=-4, add=TRUE,lty=2, rectCol="gray") histoplot(y, col="cyan", horizontal=FALSE, at=-4, add=TRUE,lty=2) # formula input data("iris") histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) data("diamonds", package = "ggplot2") palette <- RColorBrewer::brewer.pal(9, "Pastel1") par(mfrow=c(3, 1)) histoplot(price ~ cut, data = diamonds, las = 1, col = palette) histoplot(price ~ clarity, data = diamonds, las = 2, col = palette) histoplot(price ~ color, data = diamonds, las = 2, col = palette) par(mfrow=c(3, 1)) #generate example data data_one <- rnorm(100) data_two <- rnorm(50, 1, 2) #generate histogram plot with similar functionality to histoplot histoplot(data_one, data_two, col="magenta") #note vioplox defaults to a greyscale plot histoplot(data_one, data_two) #colours can be customised separately, with axis labels, legends, and titles histoplot(data_one, data_two, col=c("red","blue"), names=c("data one", "data two"), main="data histogram", xlab="data class", ylab="data read") legend("topleft", fill=c("red","blue"), legend=c("data one", "data two")) #colours can be customised for the histogram fill and border separately histoplot(data_one, data_two, col="grey85", border="purple", names=c("data one", "data two"), main="data histogram", xlab="data class", ylab="data read") #colours can also be customised for the boxplot rectange and lines (border and whiskers) histoplot(data_one, data_two, col="grey85", rectCol="lightblue", lineCol="blue", border="purple", names=c("data one", "data two"), main="data histogram", xlab="data class", ylab="data read") #these colours can also be customised separately for each histogram histoplot(data_one, data_two, col=c("skyblue", "plum"), rectCol=c("lightblue", "palevioletred"), lineCol="blue", border=c("royalblue", "purple"), names=c("data one", "data two"), main="data histogram", xlab="data class", ylab="data read") #this applies to any number of histograms, given that colours are provided for each histoplot(data_one, data_two, rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), col=c("red", "orange", "green", "blue", "violet"), rectCol=c("palevioletred", "peachpuff", "lightgreen", "lightblue", "plum"), lineCol=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), border=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), names=c("data one", "data two", "data three", "data four", "data five"), main="data histogram", xlab="data class", ylab="data read") #The areaEqual parameter scales with width of histograms #histograms will have equal density area (including missing tails) rather than equal maximum width histoplot(data_one, data_two, areaEqual=TRUE) histoplot(data_one, data_two, areaEqual=TRUE, col=c("skyblue", "plum"), rectCol=c("lightblue", "palevioletred"), lineCol="blue", border=c("royalblue", "purple"), names=c("data one", "data two"), main="data histogram", xlab="data class", ylab="data read") histoplot(data_one, data_two, rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), areaEqual=TRUE, col=c("red", "orange", "green", "blue", "violet"), rectCol=c("palevioletred", "peachpuff", "lightgreen", "lightblue", "plum"), lineCol=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), border=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), names=c("data one", "data two", "data three", "data four", "data five"), main="data histogram", xlab="data class", ylab="data read") } \keyword{graphics} \keyword{histogram} \keyword{plot} vioplot/DESCRIPTION0000755000176200001440000000231114344672512013464 0ustar liggesusersPackage: vioplot Title: Violin Plot Version: 0.4.0 Date: 2022-12-08 Authors@R: c(person("Daniel", "Adler", email = "dadler@uni-goettingen.de", role = c("aut", "cph")), person("S. Thomas", "Kelly", email = "tomkellygenetics@gmail.com", role = c("aut", "cre")), person("Tom M.", "Elliott", email = "tom.elliott@auckland.ac.nz", role = c("aut", "ctb")), person("Jordan", "Adamson", email = "adamson@wifa.uni-leipzig.de", role = c("aut", "ctb"))) Description: A violin plot is a combination of a box plot and a kernel density plot. This package allows extensive customisation of violin plots. Depends: sm, zoo License: BSD_3_clause + file LICENSE URL: https://github.com/TomKellyGenetics/vioplot BugReports: https://github.com/TomKellyGenetics/vioplot/issues RoxygenNote: 7.1.2 Suggests: base, ggplot2, RColorBrewer, knitr, rmarkdown, testthat Language: en-GB VignetteBuilder: knitr Encoding: UTF-8 NeedsCompilation: no Packaged: 2022-12-09 01:26:08 UTC; tom Author: Daniel Adler [aut, cph], S. Thomas Kelly [aut, cre], Tom M. Elliott [aut, ctb], Jordan Adamson [aut, ctb] Maintainer: S. Thomas Kelly Repository: CRAN Date/Publication: 2022-12-09 17:50:02 UTC vioplot/build/0000755000176200001440000000000014344507257013060 5ustar liggesusersvioplot/build/vignette.rds0000644000176200001440000000072214344507257015420 0ustar liggesusersTAO0&*F"jԣ$&^f4DAx#*4e-nlOc]4C{}^=+xEX7`*èx% FJ%z!۝|*7l o" ˻+$d8Ɯ K 6Q!yGh7Y.0L؜]jNH=1kSԁg ̇&@;5` ]r[K}1\G$W`,${+_J>Nu3S3y}3vng|s 5sfX]Z3StfVY9մ2 S}M46Ι">YR2bOT%UK/LKW$ krKa&b4d+0 0lFX87pеvioplot/tests/0000755000176200001440000000000014027267745013126 5ustar liggesusersvioplot/tests/testthat/0000755000176200001440000000000014344672512014760 5ustar liggesusersvioplot/tests/testthat/test_ylog.R0000755000176200001440000002225713671617551017133 0ustar liggesuserslibrary("vioplot") context("log-scale") test_that("plot defaults", { data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) }) ##y-axis log scale (ylog) test_that("log-scale y-axis with ylog TRUE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = T) }) test_that("linear y-axis with ylog FALSE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = F) }) ##y-axis log scale (log=TRUE) test_that("log-scale y-axis with log TRUE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = T) }) test_that("linear y-axis with log FALSE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = F) }) test_that("override with ylog TRUE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = F, ylog=T) }) ##y-axis log scale (log="y") test_that("log-scale y-axis with log='y'", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = 'y') }) test_that("log-scale y-axis with log='xy'", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = 'xy') }) test_that("linear y-axis with log=''", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = '') }) test_that("linear y-axis with log='x'", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = 'x') }) test_that("override with ylog TRUE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = 'x', ylog=T) }) ##y-axis removed test_that("linear scale y-axis with labels removed", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = F, yaxt="n") }) test_that("log-scale y-axis with labels removed", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = T, yaxt="n") }) library("vioplot") context("log-scale") test_that("horizontal: plot defaults", { data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica")) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica")) }) ##x-axis log scale (ylog) test_that("horizontal: log-scale x-axis with ylog TRUE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = T) }) test_that("horizontal: log-scale x-axis with xlog TRUE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", xlog = T) }) test_that("horizontal: linear x-axis with ylog FALSE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = F) }) test_that("horizontal: linear x-axis with xlog FALSE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", xlog = F) }) ##x-axis log scale (log=TRUE) test_that("horizontal: log-scale x-axis with log TRUE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = T) }) test_that("horizontal: linear x-axis with log FALSE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = F) }) test_that("horizontal: override with ylog TRUE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = F, ylog=T) }) ##x-axis log scale (log="y") test_that("horizontal: log-scale x-axis with log='y'", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = 'y') }) test_that("horizontal: log-scale x-axis with log='xy'", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = 'xy') }) test_that("horizontal: linear x-axis with log=''", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = '') }) test_that("horizontal: linear x-axis with log='x'", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = 'x') }) test_that("horizontal: override with ylog TRUE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = 'x', ylog=T) }) test_that("horizontal: override with xlog TRUE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = 'x', xlog=T) }) ##x-axis removed test_that("horizontal: linear scale x-axis with labels removed", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = F, yaxt="n") }) test_that("horizontal: linear scale x-axis with labels removed", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", xlog = F, yaxt="n") }) test_that("horizontal: log-scale x-axis with labels removed", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], horizontal = TRUE, names=c("setosa", "versicolor", "virginica"), main="Sepal Length", xlog = T, yaxt="n") }) vioplot/tests/testthat/test_violin_median.R0000755000176200001440000000514613411056063020757 0ustar liggesuserslibrary("vioplot") context("side option") test_that("plot with left and right side options and median point", { data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta", side ="left") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta", side ="right") }) test_that("plot with both sides and median line", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta", side ="both", plotCentre = "line") }) test_that("plot with left and right side options and median line", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta", side ="left", plotCentre = "line") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta", side ="right", plotCentre = "line") }) test_that("plot with areaEqual options and median line", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta", side ="both", plotCentre = "line", areaEqual = T) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta", side ="left", plotCentre = "line", areaEqual = T) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta", side ="right", plotCentre = "line", areaEqual = T) }) vioplot/tests/testthat/test_violin_formula.R0000755000176200001440000000233613411056063021165 0ustar liggesuserslibrary("vioplot") context("formula input") test_that("plot data list input", { data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") }) test_that("plot vector formula input", { boxplot(iris$Sepal.Length~iris$Species) vioplot(iris$Sepal.Length~iris$Species) }) test_that("plot column formula with dataframe input", { boxplot(Sepal.Length~Species, data=iris) vioplot(Sepal.Length~Species, data=iris) }) test_that("plot formula with dataframe input and scalar colour", { vioplot(Sepal.Length~Species, data=iris, col="lightblue") }) test_that("plot formula with dataframe input and vector colour", { vioplot(Sepal.Length~Species, data=iris, col=c("lightgreen", "lightblue", "palevioletred")) }) # attach(iris) # iris <- as.matrix(iris[,1:4]) # test_that("plot column formula with matrix input", { # vioplot(Sepal.Length~Species, data=iris) # }) vioplot/tests/testthat/test_violin_customisation.R0000755000176200001440000000651513411056063022424 0ustar liggesuserslibrary("vioplot") context("color custom and vectorised") test_that("plot with defaults and scalar colours", { data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue") }) #Vectorisation test_that("plot with vector colours", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) }) ## Plot colours: Violin Lines and Boxplot test_that("plot with scalar border", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue") }) test_that("plot with scalar boxplot modifications", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") }) test_that("plot with scalar median colour", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", colMed="violet") }) ### Combined customisation test_that("plot with combined scalar customisation", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") }) ### Vectorisation test_that("plot with combined vector customisation", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) }) vioplot/tests/testthat/test_histoplot_formula.R0000755000176200001440000000235214344404314021712 0ustar liggesuserslibrary("vioplot") context("formula input") test_that("plot data list input", { data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") }) test_that("plot vector formula input", { boxplot(iris$Sepal.Length~iris$Species) histoplot(iris$Sepal.Length~iris$Species) }) test_that("plot column formula with dataframe input", { boxplot(Sepal.Length~Species, data=iris) histoplot(Sepal.Length~Species, data=iris) }) test_that("plot formula with dataframe input and scalar colour", { histoplot(Sepal.Length~Species, data=iris, col="lightblue") }) test_that("plot formula with dataframe input and vector colour", { histoplot(Sepal.Length~Species, data=iris, col=c("lightgreen", "lightblue", "palevioletred")) }) # attach(iris) # iris <- as.matrix(iris[,1:4]) # test_that("plot column formula with matrix input", { # histoplot(Sepal.Length~Species, data=iris) # }) vioplot/tests/testthat/test_violin_classes.R0000755000176200001440000000133414344404314021154 0ustar liggesuserslibrary("vioplot") context("different input classes") test_that("input as data.frame", { data(iris) boxplot(as.data.frame(iris)) vioplot(as.data.frame(iris)[sapply(as.list(iris), is.numeric)]) }) test_that("input as list", { data(iris) boxplot(as.list(iris)) vioplot(as.list(iris)[sapply(as.list(iris), is.numeric)]) }) test_that("input as matrix (by col)", { data(iris) boxplot(as.matrix(iris[sapply(as.list(iris), is.numeric)])) vioplot(as.matrix(iris[sapply(as.list(iris), is.numeric)])) }) test_that("input as matrix (by col)", { data(iris) boxplot(as.matrix(iris[sapply(as.list(iris), is.numeric)]), use.cols = FALSE) vioplot(as.matrix(iris[sapply(as.list(iris), is.numeric)]), use.cols = FALSE) }) vioplot/tests/testthat/test_violin_side.R0000755000176200001440000000146113411056063020442 0ustar liggesuserslibrary("vioplot") context("side option") test_that("plot with left and right side options", { data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta", side ="left") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta", side ="right") }) vioplot/tests/testthat/test_violin_names.R0000755000176200001440000000067013411056063020622 0ustar liggesuserslibrary("vioplot") context("names input") test_that("list input", { data(iris) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("a", "b", "c")) }) test_that("naming formulae", { data(iris) vioplot(iris$Sepal.Length~iris$Species, names=c("a", "b", "c")) vioplot(Sepal.Length~Species, data=iris, names=c("a", "b", "c")) }) vioplot/tests/testthat/test_violin_na_handle.R0000755000176200001440000000236313411056063021431 0ustar liggesuserslibrary("vioplot") context("NA handling for vector or formula input") test_that("plot data list input", { data(iris) iris[2,3]<-NA boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") }) test_that("plot vector formula input", { boxplot(iris$Sepal.Length~iris$Species) vioplot(iris$Sepal.Length~iris$Species) }) test_that("plot column formula with dataframe input", { boxplot(Sepal.Length~Species, data=iris) vioplot(Sepal.Length~Species, data=iris) }) test_that("plot formula with dataframe input and scalar colour", { vioplot(Sepal.Length~Species, data=iris, col="lightblue") }) test_that("plot formula with dataframe input and vector colour", { vioplot(Sepal.Length~Species, data=iris, col=c("lightgreen", "lightblue", "palevioletred")) }) # iris <- as.matrix(iris) # test_that("plot column formula with matrix input", { # vioplot(Sepal.Length~Species, data=iris) # }) vioplot/tests/testthat/test_violin_unequal_groups.R0000755000176200001440000000117413411056063022570 0ustar liggesuserslibrary("vioplot") context("unequal group size") data(iris) table(iris$Species) identical(as.numeric(table(iris$Species)), c(50, 50, 50)) index <- sample(1:3,150,replace=T) while(identical(as.numeric(table(index)), c(50, 50, 50))) index <- sample(1:3,150,replace=T) table(index) iris$Species <- factor(names(table(iris$Species))[index]) test_that("list input", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"]) }) test_that("formulae input", { vioplot(iris$Sepal.Length~iris$Species) vioplot(Sepal.Length~Species, data=iris) }) vioplot/tests/testthat/test_histoplot_customisation.R0000755000176200001440000000653714344404314023157 0ustar liggesuserslibrary("vioplot") context("color custom and vectorised") test_that("plot with defaults and scalar colours", { data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta") histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue") }) #Vectorisation test_that("plot with vector colours", { histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) }) ## Plot colours: Violin Lines and Boxplot test_that("plot with scalar border", { histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue") }) test_that("plot with scalar boxplot modifications", { histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") }) test_that("plot with scalar median colour", { histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", colMed="violet") }) ### Combined customisation test_that("plot with combined scalar customisation", { histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") }) ### Vectorisation test_that("plot with combined vector customisation", { histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) }) vioplot/tests/testthat/test_violin_area.R0000755000176200001440000000410513411056063020424 0ustar liggesuserslibrary("vioplot") context("controlling area") test_that("plot defaults", { data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) }) ##Violin Plot Area test_that("equal area with areaEqual TRUE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", areaEqual = TRUE) }) test_that("equal width with areaEqual FALSE", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Width)", areaEqual = FALSE) }) test_that("equal area with areaEqual with vector colours", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = TRUE, col=c("lightgreen", "lightblue", "palevioletred"), rectCol=c("green", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), border=c("darkolivegreen4", "royalblue4", "violetred4")) }) test_that("equal area with areaEqual and wex scaling", { vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = TRUE, col=c("lightgreen", "lightblue", "palevioletred"), rectCol=c("green", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), border=c("darkolivegreen4", "royalblue4", "violetred4"), wex=1.25) }) vioplot/tests/testthat.R0000755000176200001440000000007214027267745015113 0ustar liggesuserslibrary(testthat) library(vioplot) test_check("vioplot") vioplot/vignettes/0000755000176200001440000000000014344507257013771 5ustar liggesusersvioplot/vignettes/histogram_formulae.Rmd0000755000176200001440000001201114344404314020310 0ustar liggesusers--- title: "Customising Histogram Plots with Formula Input" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{histoplot: Customising Histogram Plots with Formula Input} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- Since boxplots have become the _de facto_ standard for plotting the distribution of data most users are familiar with these and the formula input for dataframes. However this input is not available in the standard `histoplot` package. Thus it has been restored here for enhanced backwards compatibility with `boxplot`. As shown below for the `iris` dataset, histogram plots show distribution information taking formula input that `boxplot` implements but `histoplot` is unable to. This demonstrates the customisation demonstrated in [the main histoplot vignette using histoplot syntax](histogram_customisation.html) with the formula method commonly used for `boxplot`, `t.test`, and `lm`. ```{r} library("vioplot") ``` ```{r, message=FALSE, eval=FALSE} data(iris) boxplot(Sepal.Length~Species, data = iris) ``` ```{r, message=FALSE, echo=FALSE} data(iris) boxplot(Sepal.Length~Species, data = iris, main = "Sepal Length") ``` Whereas performing the same function does not work with `vioplot` (0.2). ```{r, message=FALSE, eval=FALSE} devtools::install_version("vioplot", version = "0.2") library("vioplot") vioplot(Sepal.Length~Species, data = iris) ``` ``` Error in min(data) : invalid 'type' (language) of argument ``` ## Plot Defaults ```{r, message=FALSE, eval=FALSE} vioplot(Sepal.Length~Species, data = iris) ``` ```{r, message=FALSE, echo=FALSE} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="magenta") ``` Another concern we see here is that the `vioplot` defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length") ``` ## Plot colours: Histogram Fill Plot colours can be further customised as with the original vioplot package using the `col` argument: ```{r} histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue") ``` ### Vectorisation However the `vioplot` (0.2) function is unable to colour each histogram separately, thus this is enabled with a vectorised `col` in `histoplot` (0.4): ```{r} histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ``` ## Plot colours: Violin Lines and Boxplot Colours can also be customised for the histogram fill and border separately using the `col` and `border` arguments: ```{r} histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue") ``` Similarly, the arguments `lineCol` and `rectCol` specify the colours of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour. ```{r} histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") ``` The same applies to the colour of the median point with `colMed`: ```{r} histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", colMed="violet") ``` ### Combined customisation These can be customised colours can be combined: ```{r} histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") ``` ### Vectorisation These colour and shape settings can also be customised separately for each histogram: ```{r} histoplot(Sepal.Length~Species, data = iris, main="Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) ``` ## Split Bihistogram Plots We set up the data with two categories (Sepal Width) as follows: ```{r, message=FALSE} data(iris) summary(iris$Sepal.Width) table(iris$Sepal.Width > mean(iris$Sepal.Width)) iris_large <- iris[iris$Sepal.Width > mean(iris$Sepal.Width), ] iris_small <- iris[iris$Sepal.Width <= mean(iris$Sepal.Width), ] ``` A direct comparision of 2 datasets can be made with the `side` argument and `add = TRUE` on the second plot: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} histoplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right") histoplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T) title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` vioplot/vignettes/overlaying_annotations.Rmd0000755000176200001440000000550314344404314021225 0ustar liggesusers--- title: "Overlaying base R graphics" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Overlaying base R graphics} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ## Introduction: Integration with base R graphics Here we demonstrate how to combine violin plots with other base R graphics. In principle any base R graphics can be overlayed on top of a violin plot for annotation. Many problems can be resolved by overlaying base R graphics and integrating vioplot with other plotting functions. Any additional elements can be overlayed by running commands after generating the plot. The x-axes are integer values [1,2,3,…] for each violin. The y-axes are continuous values as displayed. The following plotting elements are supported for example: points, lines, polygon It is also possible to modify plotting parameters with: title, axis, legend "vioplot()" functions similar to "plot()" and passes input arguments from "par()". ### Plotting violins with highlighted medians For example it is possible to add additional annotations. ```{r} # generate dummy data a <- rnorm(25, 3, 0.5) b <- rnorm(25, 2, 1.0) c <- rnorm(25, 2.75, 0.25) d <- rnorm(25, 3.15, 0.375) e <- rnorm(25, 1, 0.25) datamat <- cbind(a, b, c, d, e) dim(datamat) ``` ```{r} library("vioplot") ``` ```{r} vioplot(datamat, ylim = c(0, 5)) # compute medians data.med <- apply(datamat, 2, median) data.med #overlay medians lines(data.med, lty = 2, lwd = 1.5) points(data.med, pch = 19, col = "red", cex = 2.25) ``` ### Custom axes and titles It is also possible to modify the axes labels and titles as shown in this example. Here default axes are suppressed and replaced with custom parameters. ```{r} outcome <- c(rnorm(25, 3, 1), rnorm(25, 2, 0.5)) intervention <- c(rep("treatment", 25), rep("control", 25)) table(intervention) names(table(intervention)) unique(sort(intervention)) intervention <- as.factor(intervention) levels(intervention) d <- data.frame(outcome, intervention) vioplot(outcome ~ intervention, data = d, xaxt = 'n', yaxt = 'n', main = "", xlab = "", ylab = "") axis(side = 1, at = 1:length(levels(intervention)), labels = levels(intervention)) mtext("custom x labels for intervention", side = 1) mtext("custom y labels for outcome", side = 2) title(main = "example with custom title", sub = "subtitles are supported") ``` #### Annotated histograms This is also supported by the histogram plot. ```{r} histoplot(outcome ~ intervention, data = d, xaxt = 'n', yaxt = 'n', main = "", xlab = "", ylab = "") axis(side = 1, at = 1:length(levels(intervention)), labels = levels(intervention)) mtext("custom x labels for intervention", side = 1) mtext("custom y labels for outcome", side = 2) title(main = "example with custom title", sub = "subtitles are supported") ``` vioplot/vignettes/histogram_customisation.Rmd0000755000176200001440000001731014344475146021421 0ustar liggesusers--- title: "Customising Violin Plots with Histograms" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Customising Violin Plots with Histograms} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- While boxplots have become the _de facto_ standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Gaussian "Normal" distribution that most researchers have become accustomed to. While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience. Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits: - Greater flexibility for plotting variation than boxplots - More familiarity to boxplot users than density plots - Easier to directly compare data types than existing plots As shown below for the `iris` dataset, histogram plots show distribution information that the boxplot is unable to. ```{r} library("vioplot") ``` ```{r, message=FALSE, eval=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) library("vioplot") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) ``` ```{r, message=FALSE, echo=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta") ``` ## Plot Defaults However as we can see here the plot defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") ``` # Histogram plot Here we introduce a variant of the violin plot, using a mirrored bihistogram to show the distribution: ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") ``` ## Plot colours: Histogram Fill Plot colours can be further customised as with the original viooplot package using the `col` argument: ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue") ``` ### Vectorisation The `vioplot` (0.2) function is unable to colour each histogram separately, thus this is enabled with a vectorised `col` in `viooplot` (0.3) and `histoplot` (0.4): ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ``` ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ``` ## Plot colours: Violin Lines and Boxplot Colours can also be customised for the histogram fill and border separately using the `col` and `border` arguments: ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue") ``` Similarly, the arguments `lineCol` and `rectCol` specify the colors of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour. ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") ``` The same applies to the colour of the median point with `colMed`: ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", colMed="violet") ``` ### Combined customisation These can be customised colours can be combined: ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") ``` ### Vectorisation These color and shape settings can also be customised separately for each histogram: ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) ``` ## Split Bihistogram Plots We set up the data with two categories (Sepal Width) as follows: ```{r, message=FALSE} data(iris) summary(iris$Sepal.Width) table(iris$Sepal.Width > mean(iris$Sepal.Width)) iris_large <- iris[iris$Sepal.Width > mean(iris$Sepal.Width), ] iris_small <- iris[iris$Sepal.Width <= mean(iris$Sepal.Width), ] ``` A direct comparision of 2 datasets can be made with the `side` argument and `add = TRUE` on the second plot: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} histoplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right") histoplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T) title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` vioplot/vignettes/violin_split.Rmd0000755000176200001440000001641114027763472017157 0ustar liggesusers--- title: "Split Violin Plots" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette fig_width: 6 fig_height: 3 fig_align: 'center' fig_keep: 'last' vignette: > %\VignetteIndexEntry{vioplot: Split Violin Plots} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ##Violin Plots Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits: - Greater flexibility for plotting variation than boxplots - More familiarity to boxplot users than density plots - Easier to directly compare data types than existing plots As shown below for the `iris` dataset, violin plots show distribution information that the boxplot is unable to. ###General Set up ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} library("vioplot") ``` We set up the data with two categories (Sepal Width) as follows: ```{r, message=FALSE} data(iris) summary(iris$Sepal.Width) table(iris$Sepal.Width > mean(iris$Sepal.Width)) iris_large <- iris[iris$Sepal.Width > mean(iris$Sepal.Width), ] iris_small <- iris[iris$Sepal.Width <= mean(iris$Sepal.Width), ] ``` ###Boxplots First we plot Sepal Length on its own: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} boxplot(Sepal.Length~Species, data=iris, col="grey") ``` An indirect comparison can be achieved with par: ```{r, fig.align = 'center', fig.height = 6, fig.width = 6, fig.keep = 'last'} { par(mfrow=c(2,1)) boxplot(Sepal.Length~Species, data=iris_small, col = "lightblue") boxplot(Sepal.Length~Species, data=iris_large, col = "palevioletred") par(mfrow=c(1,1)) } ``` ### Violin Plots First we plot Sepal Length on its own: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris) ``` An indirect comparison can be achieved with par: ```{r, fig.align = 'center', fig.height = 6, fig.width = 6, fig.keep = 'last'} { par(mfrow=c(2,1)) vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line") vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line") par(mfrow=c(1,1)) } ``` ### Split Violin Plots A more direct comparision can be made with the `side` argument and `add = TRUE` on the second plot: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T) title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` #### Custom axes labels Custom axes labels are supported for split violin plots. However, you must use these arguments on the *first* call of `vioplot`. ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right", xlab = "Iris species", ylab = "Length", main = "Sepals", names=paste("Iris", levels(iris$Species))) vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T) legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Width") ``` Note that this is disabled for the second `vioplot` call to avoid overlaying labels. ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T, xlab = "Iris species", ylab = "Length", main = "Sepals", names=paste("Iris", levels(iris$Species))) legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Width") ``` #### Median The line median option is more suitable for side by side comparisions but the point option is still available also: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "point", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "point", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T) title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` It may be necessary to include a `points` command to fix the median being overwritten by the following plots: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "point", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "point", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T) points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_large[grep(species, iris_large$Species),]$Sepal.Length))), pch = 21, col = "palevioletred4", bg = "palevioletred2") title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` Similarly points could be added where a line has been used previously: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T) points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_large[grep(species, iris_large$Species),]$Sepal.Length))), pch = 21, col = "palevioletred4", bg = "palevioletred2") points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_small[grep(species, iris_small$Species),]$Sepal.Length))), pch = 21, col = "lightblue4", bg = "lightblue2") title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` Here it is aesthetically pleasing and intuitive to interpret categorical differences in mean and variation in a continuous variable. #### Sources These extensions to `vioplot` here are based on those provided here: * https://gist.github.com/mbjoseph/5852613 These have previously been discussed on the following sites: * https://mbjoseph.github.io/posts/2018-12-23-split-violin-plots/ * http://tagteam.harvard.edu/hub_feeds/1981/feed_items/209875 * [https://www.r-bloggers.com/split-violin-plots/](https://www.r-bloggers.com/2013/06/split-violin-plots/) vioplot/vignettes/backup/0000755000176200001440000000000013507674421015234 5ustar liggesusersvioplot/vignettes/backup/violin_split.Rmd0000644000176200001440000001374313507652103020414 0ustar liggesusers--- title: "Split Violin Plots" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette fig_width: 6 fig_height: 3 fig_align: 'center' fig_keep: 'last' vignette: > %\VignetteIndexEntry{vioplot: Split Violin Plots} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ##Violin Plots Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits: - Greater flexbility for plotting variation than boxplots - More familiarity to boxplot users than density plots - Easier to directly compare data types than existing plots As shown below for the `iris` dataset, violin plots show distribution information that the boxplot is unable to. ###General Set up ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} library("vioplot") ``` We set up the data with two categories (Sepal Width) as follows: ```{r, message=FALSE} data(iris) summary(iris$Sepal.Width) table(iris$Sepal.Width > mean(iris$Sepal.Width)) iris_large <- iris[iris$Sepal.Width > mean(iris$Sepal.Width), ] iris_small <- iris[iris$Sepal.Width <= mean(iris$Sepal.Width), ] ``` ###Boxplots First we plot Sepal Length on its own: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} boxplot(Sepal.Length~Species, data=iris, col="grey") ``` An indirect comparison can be achieved with par: ```{r, fig.align = 'center', fig.height = 6, fig.width = 6, fig.keep = 'last'} { par(mfrow=c(2,1)) boxplot(Sepal.Length~Species, data=iris_small, col = "lightblue") boxplot(Sepal.Length~Species, data=iris_large, col = "palevioletred") par(mfrow=c(1,1)) } ``` ### Violin Plots First we plot Sepal Length on its own: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris) ``` An indirect comparison can be achieved with par: ```{r, fig.align = 'center', fig.height = 6, fig.width = 6, fig.keep = 'last'} { par(mfrow=c(2,1)) vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line") vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line") par(mfrow=c(1,1)) } ``` ### Split Violin Plots A more direct comparison can be made with the `side` argument and `add = TRUE` on the second plot: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T) title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` ### median The line median option is more suitable for side by side comparisons but the point option is still available also: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "point", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "point", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T) title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` It may be necessary to include a `points` command to fix the median being overwritten by the following plots: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "point", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "point", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T) points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_large[grep(species, iris_large$Species),]$Sepal.Length))), pch = 21, col = "palevioletred4", bg = "palevioletred2") title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` Similarly points could be added where a line has been used previously: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T) points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_large[grep(species, iris_large$Species),]$Sepal.Length))), pch = 21, col = "palevioletred4", bg = "palevioletred2") points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_small[grep(species, iris_small$Species),]$Sepal.Length))), pch = 21, col = "lightblue4", bg = "lightblue2") title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` Here it is aesthetically pleasing and intuitive to interpret categorical differences in mean and variation in a continuous variable. #### Sources These extensions to `vioplot` here are based on those provided here: * https://gist.github.com/mbjoseph/5852613 These have previously been discussed on the following sites: * https://mbjoseph.github.io/posts/2018-12-23-split-violin-plots/ * http://tagteam.harvard.edu/hub_feeds/1981/feed_items/209875 * https://www.r-bloggers.com/split-violin-plots/ vioplot/vignettes/backup/violin_ylog.Rmd0000644000176200001440000001121113507661566020234 0ustar liggesusers--- title: "Controlling y-axis Plotting" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Controlling y-axis Plotting} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- While boxplots have become the _de facto_ standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Gaussian "Normal" distribution that most researchers have become accustomed to. While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience. ##Violin Plots Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits: - Greater flexbility for plotting variation than boxplots - More familiarity to boxplot users than density plots - Easier to directly compare data types than existing plots As shown below for the `iris` dataset, violin plots show distribution information that the boxplot is unable to. ```{r} library("vioplot") ``` ```{r, message=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) ``` ##Violin y-axis ###Logarithmic scale However the existing violin plot packages (such as \code{\link[vioplot]{vioplot}}) do not support log-scale of the y-axis. This has been amended with the `ylog` argument. ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = T) ``` This can also be invoked with the `log="y"` argument compatible with `boxplot`: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = T) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = "y") ``` Log axis can also be passed to horizontal plots: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = "", horizontal = TRUE) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = "y", horizontal = TRUE) ``` ###custom y-axes The y-axes can also be removed with `yaxt="n"` to enable customised y-axes: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = T, yaxt="n") ``` Thus custom axes can be added to violin plots. As shown on a linear scale: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n") axis(2, at=1:10, labels=1:10) ``` As well as for on a log scale: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n", log="y", ylim=c(log(4), log(9))) axis(2, at=log(1:10), labels=1:10) ``` vioplot/vignettes/backup/violin_split.html0000755000176200001440000462432313320630501020636 0ustar liggesusers Split Violin Plots

Violin Plots

Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits:

  • Greater flexbility for plotting variation than boxplots
  • More familiarity to boxplot users than density plots
  • Easier to directly compare data types than existing plots

As shown below for the iris dataset, violin plots show distribution information that the boxplot is unable to.

General Set up

We set up the data with two categories (Sepal Width) as follows:

data(iris)
summary(iris$Sepal.Width)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.800   3.000   3.057   3.300   4.400
table(iris$Sepal.Width > mean(iris$Sepal.Width))
## 
## FALSE  TRUE 
##    83    67
iris_large <- iris[iris$Sepal.Width > mean(iris$Sepal.Width), ]
iris_small <- iris[iris$Sepal.Width <= mean(iris$Sepal.Width), ]

Boxplots

First we plot Sepal Length on its own:

boxplot(Sepal.Length~Species, data=iris, col="grey")

An indirect comparision can be achieved with par:

par(mfrow=c(2,1))
boxplot(Sepal.Length~Species, data=iris_small, col = "lightblue")
boxplot(Sepal.Length~Species, data=iris_large, col = "palevioletred")

par(mfrow=c(1,1))

Violin Plots

First we plot Sepal Length on its own:

vioplot(Sepal.Length~Species, data=iris)

An indirect comparision can be achieved with par:

par(mfrow=c(2,1))
vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line")
vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line")

par(mfrow=c(1,1))

Split Violin Plots

A more direct comparision can be made with the side argument and add = TRUE on the second plot:

vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right")
vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T)
title(xlab = "Species", ylab = "Sepal Length")
legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width")

median

The line median option is more suitable for side by side comparisions but the point option is still available also:

vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "point", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2")
vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "point", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T)
title(xlab = "Species", ylab = "Sepal Length")
legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width")

It may be necessary to include a points command to fix the median being overwritten by the following plots:

vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "point", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2")
vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "point", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T)
points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_large[grep(species, iris_large$Species),]$Sepal.Length))), pch = 21, col = "palevioletred4", bg = "palevioletred2")
title(xlab = "Species", ylab = "Sepal Length")
legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width")

Similarly points could be added where a line has been used previously:

vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2")
vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T)
points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_large[grep(species, iris_large$Species),]$Sepal.Length))), pch = 21, col = "palevioletred4", bg = "palevioletred2")
points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_small[grep(species, iris_small$Species),]$Sepal.Length))), pch = 21, col = "lightblue4", bg = "lightblue2")
title(xlab = "Species", ylab = "Sepal Length")
legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width")

Here it is aesthetically pleasing and intuitive to interpret categorical differences in mean and variation in a continuous variable.

vioplot/vignettes/backup/violin_area.html0000755000176200001440000537227213320630501020417 0ustar liggesusers Controlling Violin Plot Area

While boxplots have become the de facto standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Guassian “Normal” distribution that most researchers have become accustomed to.

While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience.

Violin Plots

Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits:

  • Greater flexbility for plotting variation than boxplots
  • More familiarity to boxplot users than density plots
  • Easier to directly compare data types than existing plots

As shown below for the iris dataset, violin plots show distribution information that the boxplot is unable to.

data(iris)
boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"))

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"))

Violin Plot Area

However there are concerns that existing violin plot packages (such as ) scales the data to the most aesthetically suitable width rather than maintaining proportions comparable across data sets. Consider the differing distributions shown below:

par(mfrow=c(3, 1))
par(mar=rep(2, 4))
plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length: setosa", col="green")
plot(density(iris$Sepal.Length[iris$Species=="versicolor"]), main="Sepal Length: versicolor", col="blue")
plot(density(iris$Sepal.Length[iris$Species=="virginica"]), main="Sepal Length: virginica", col="palevioletred4")

par(mfrow=c(1, 1))

Comparing datasets

Neither of these plots above show the relative distribtions on the same scale, even if we match the x-axis of a density plot the relative heights are obscured and difficult to compare.

par(mfrow=c(3, 1))
par(mar=rep(2, 4))
xaxis <- c(3, 9)
yaxis <- c(0, 1.25)
plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length: setosa", col="green", xlim=xaxis, ylim=yaxis)
plot(density(iris$Sepal.Length[iris$Species=="versicolor"]), main="Sepal Length: versicolor", col="blue", xlim=xaxis, ylim=yaxis)
plot(density(iris$Sepal.Length[iris$Species=="virginica"]), main="Sepal Length: virginica", col="palevioletred4", xlim=xaxis, ylim=yaxis)

par(mfrow=c(1, 1))

This can somewhat be addressed by overlaying density plots:

par(mfrow=c(1, 1))
xaxis <- c(3, 9)
yaxis <- c(0, 1.25)
plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length", col="green", xlim=xaxis, ylim=yaxis)
lines(density(iris$Sepal.Length[iris$Species=="versicolor"]), col="blue")
lines(density(iris$Sepal.Length[iris$Species=="virginica"]), col="palevioletred4")
legend("topright", fill=c("green", "blue", "palevioletred4"), legend=levels(iris$Species), cex=0.5)

This has the benefit of highlighting the differnt distributions of the data subsets. However, notice here that a figure legend become necessary, plot axis limits need to be defined to display the range of all distribution curves, and the plot quickly becomes cluttered if the number of factors to be compared becomes much larger.

Area control in Violin plot

Therefore the areaEqual parameter has been echod to customise the violin plot to serve a similar purpose:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", areaEqual = T)

If we compare this to the original vioplot functionality (defaulting to areaEqual = FALSE) the differences between the two are clear.

par(mfrow=c(2,1))
par(mar=rep(2, 4))
vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Width)", areaEqual = F)
vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T)

par(mfrow=c(1,1))

Note that areaEqual is considering the full area of the density distribution before removing the outlier tails. We leave it up to the users discretion which they elect to use. The areaEqual functionality is compatible with all of the customisation used in discussed in the main vioplot vignette

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), rectCol=c("green", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), border=c("darkolivegreen4", "royalblue4", "violetred4"))

The violin width can further be scaled with wex, which maintains the proportions across the datasets if areaEqual = TRUE:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), rectCol=c("green", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), border=c("darkolivegreen4", "royalblue4", "violetred4"), wex=1.25)

Comparing distributions

Notice the utility of areaEqual for cases where different datasets have different underlying distributions:

vioplot(rnorm(200, 3, 0.5), rpois(200, 2.5),  rbinom(100, 10, 0.4), rlnorm(200, 0, 0.5), rnbinom(200, 10, 0.9), rlogis(20, 0, 0.5), areaEqual = F, main="Equal Width", xlab="distribution", ylab="data value", names=c("normal", "poisson", "binomial", "log-normal", "neg-binomial", "logistic"))

vioplot(rnorm(200, 3, 0.5), rpois(200, 2.5),  rbinom(100, 10, 0.4), rlnorm(200, 0, 0.5), rnbinom(200, 10, 0.9), rlogis(20, 0, 0.5), areaEqual = T, main="Equal Area", xlab="distribution", ylab="data value", names=c("normal", "poisson", "binomial", "log-normal", "neg-binomial", "logistic"))

vioplot/vignettes/backup/violin_area.Rmd0000644000176200001440000001714213507652102020165 0ustar liggesusers--- title: "Controlling Violin Plot Area" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Controlling Violin Plot Area} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- While boxplots have become the _de facto_ standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Gaussian "Normal" distribution that most researchers have become accustomed to. While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience. ##Violin Plots Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits: - Greater flexibility for plotting variation than boxplots - More familiarity to boxplot users than density plots - Easier to directly compare data types than existing plots As shown below for the `iris` dataset, violin plots show distribution information that the boxplot is unable to. ```{r} library("vioplot") ``` ```{r, message=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) ``` ##Violin Plot Area However there are concerns that existing violin plot packages (such as \code{\link[vioplot]{vioplot}}) scales the data to the most aesthetically suitable width rather than maintaining proportions comparable across data sets. Consider the differing distributions shown below: ```{r, echo=FALSE, message=FALSE} par(mar=rep(1,4)) ``` ```{r} par(mfrow=c(3, 1)) par(mar=rep(2, 4)) plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length: setosa", col="green") plot(density(iris$Sepal.Length[iris$Species=="versicolor"]), main="Sepal Length: versicolor", col="blue") plot(density(iris$Sepal.Length[iris$Species=="virginica"]), main="Sepal Length: virginica", col="palevioletred4") par(mfrow=c(1, 1)) ``` ```{r, echo=FALSE, message=FALSE} par(mar=c(5, 4, 4, 2) + 0.1) ``` #Comparing datasets Neither of these plots above show the relative distributions on the same scale, even if we match the x-axis of a density plot the relative heights are obscured and difficult to compare. ```{r, echo=FALSE, message=FALSE} par(mar=rep(2,4)) ``` ```{r} par(mfrow=c(3, 1)) par(mar=rep(2, 4)) xaxis <- c(3, 9) yaxis <- c(0, 1.25) plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length: setosa", col="green", xlim=xaxis, ylim=yaxis) plot(density(iris$Sepal.Length[iris$Species=="versicolor"]), main="Sepal Length: versicolor", col="blue", xlim=xaxis, ylim=yaxis) plot(density(iris$Sepal.Length[iris$Species=="virginica"]), main="Sepal Length: virginica", col="palevioletred4", xlim=xaxis, ylim=yaxis) par(mfrow=c(1, 1)) ``` ```{r, echo=FALSE, message=FALSE} par(mar=c(5, 4, 4, 2) + 0.1) ``` This can somewhat be addressed by overlaying density plots: ```{r} par(mfrow=c(1, 1)) xaxis <- c(3, 9) yaxis <- c(0, 1.25) plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length", col="green", xlim=xaxis, ylim=yaxis) lines(density(iris$Sepal.Length[iris$Species=="versicolor"]), col="blue") lines(density(iris$Sepal.Length[iris$Species=="virginica"]), col="palevioletred4") legend("topright", fill=c("green", "blue", "palevioletred4"), legend=levels(iris$Species), cex=0.5) ``` This has the benefit of highlighting the different distributions of the data subsets. However, notice here that a figure legend become necessary, plot axis limits need to be defined to display the range of all distribution curves, and the plot quickly becomes cluttered if the number of factors to be compared becomes much larger. ##Area control in Violin plot Therefore the `areaEqual` parameter has been added to customise the violin plot to serve a similar purpose: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", areaEqual = T) ``` If we compare this to the original vioplot functionality (defaulting to `areaEqual = FALSE`) the differences between the two are clear. ```{r, echo=FALSE, message=FALSE} par(mar=rep(2, 4)) ``` ```{r} par(mfrow=c(2,1)) par(mar=rep(2, 4)) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Width)", areaEqual = F) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T) par(mfrow=c(1,1)) ``` ```{r, echo=FALSE, message=FALSE} par(mar=c(5, 4, 4, 2) + 0.1) ``` Note that `areaEqual` is considering the full area of the density distribution before removing the outlier tails. We leave it up to the users discretion which they elect to use. The `areaEqual` functionality is compatible with all of the customisation used in discussed in [the main vioplot vignette](violin_customisation.html) ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), rectCol=c("green", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), border=c("darkolivegreen4", "royalblue4", "violetred4")) ``` The violin width can further be scaled with `wex`, which maintains the proportions across the datasets if `areaEqual = TRUE`: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), rectCol=c("green", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), border=c("darkolivegreen4", "royalblue4", "violetred4"), wex=1.25) ``` ## Comparing distributions Notice the utility of `areaEqual` for cases where different datasets have different underlying distributions: ```{r} vioplot(rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), rlnorm(200, 0, 0.5), rnbinom(200, 10, 0.9), rlogis(20, 0, 0.5), areaEqual = F, main="Equal Width", xlab="distribution", ylab="data value", names=c("normal", "poisson", "binomial", "log-normal", "neg-binomial", "logistic")) vioplot(rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), rlnorm(200, 0, 0.5), rnbinom(200, 10, 0.9), rlogis(20, 0, 0.5), areaEqual = T, main="Equal Area", xlab="distribution", ylab="data value", names=c("normal", "poisson", "binomial", "log-normal", "neg-binomial", "logistic")) ``` vioplot/vignettes/backup/violin_formulae.html0000755000176200001440000547060213320630501021315 0ustar liggesusers Customising Violin Plots

Since boxplots have become the de facto standard for plotting the distribution of data most users are familiar with these and the formula input for dataframes. However this input is not available in the standard vioplot package. Thus it has been restored here for enhanced backwards compatibility with boxplot.

As shown below for the iris dataset, violin plots show distribution information taking formula input that boxplot implements but vioplot is unable to. This demonstrates the customisation demonstrated in the main vioplot vignette using vioplot syntax with the formula method commonly used for boxplot, t.test, and lm.

data(iris)
boxplot(Sepal.Length~Species, data = iris)

Whereas performing the same function does not work with vioplot.

library("vioplot")
vioplot(Sepal.Length~Species, data = iris)
Error in min(data) : invalid 'type' (language) of argument

Plot Defaults

vioplot(Sepal.Length~Species, data = iris)

Another concern we see here is that the vioplot defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here:

vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length")

Plot colours: Violin Fill

Plot colours can be further customised as with the original vioplot package using the col argument:

vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue")

Vectorisation

However the vioplot function is unable to colour each violin separately, thus this is enabled with a vectorised col in vioplot:

vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"))
legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5)

Plot colours: Violin Lines and Boxplot

Colours can also be customised for the violin fill and border separately using the col and border arguments:

vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue")

Similarly, the arguments lineCol and rectCol specify the colors of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour.

vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", rectCol="palevioletred", lineCol="violetred")

The same applies to the colour of the median point with colMed:

vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", colMed="violet")

### Combined customisation

These can be customised colours can be combined:

vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet")

Vectorisation

These color and shape settings can also be customised separately for each violin:

vioplot(Sepal.Length~Species, data = iris, main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19))

vioplot/vignettes/backup/violin_ylog.html0000755000176200001440000350530213320630501020447 0ustar liggesusers Controlling y-axis Plotting

While boxplots have become the de facto standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Guassian “Normal” distribution that most researchers have become accustomed to.

While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience.

Violin Plots

Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits:

  • Greater flexbility for plotting variation than boxplots
  • More familiarity to boxplot users than density plots
  • Easier to directly compare data types than existing plots

As shown below for the iris dataset, violin plots show distribution information that the boxplot is unable to.

data(iris)
boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"))

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"))

Violin y-axis

Logarithmic scale

However the existing violin plot packages (such as ) do not support log-scale of the y-axis. This has been ammended with the ylog argument.

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = T, ylim=c(log(1), log(10)))

This can also be invoked with the log="y" argument compatble with boxplot:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = T, ylim=c(log(1), log(10)))
vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = "y", ylim=c(log(1), log(10)))

custom y-axes

The y-axes can also be removed with yaxt="n" to enable customised y-axes:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n")

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = T, yaxt="n", ylim=c(log(1), log(10)))

Thus custom axes can be added to violin plots. As shown on a linear scale:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n")
axis(2, at=1:10, labels=1:10)

As well as for on a log scale:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n", log="y", ylim=c(log(4), log(9)))
axis(2, at=log(1:10), labels=1:10)

vioplot/vignettes/backup/violin_customisation.html0000755000176200001440000547574413320630501022420 0ustar liggesusers Customising Violin Plots

While boxplots have become the de facto standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Guassian “Normal” distribution that most researchers have become accustomed to.

While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience.

Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits:

As shown below for the iris dataset, violin plots show distribution information that the boxplot is unable to.

data(iris)
boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"))
library("vioplot")
vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"))

Plot Defaults

However as we can see here the plot defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length")

Plot colours: Violin Fill

Plot colours can be further customised as with the original vioplot package using the col argument:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue")

Vectorisation

However the vioplot function is unable to colour each violin separately, thus this is enabled with a vectorised col in vioplot:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"))
legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5)

Plot colours: Violin Lines and Boxplot

Colours can also be customised for the violin fill and border separately using the col and border arguments:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue")

Similarly, the arguments lineCol and rectCol specify the colors of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour.

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", rectCol="palevioletred", lineCol="violetred")

The same applies to the colour of the median point with colMed:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", colMed="violet")

### Combined customisation

These can be customised colours can be combined:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet")

Vectorisation

These color and shape settings can also be customised separately for each violin:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19))

This should be sufficient to customise the violin plot but further examples are given in the areaEqual vioplot vignette including how violin plots are useful for comparing variation when data does not follow the same distribution. This document also compares the violin plot with other established methods to plot data variation.

vioplot/vignettes/backup/violin_customisation.Rmd0000644000176200001440000001453413507651667022177 0ustar liggesusers--- title: "Customising Violin Plots" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Customising Violin Plots} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- While boxplots have become the _de facto_ standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Gaussian "Normal" distribution that most researchers have become accustomed to. While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience. Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits: - Greater flexbility for plotting variation than boxplots - More familiarity to boxplot users than density plots - Easier to directly compare data types than existing plots As shown below for the `iris` dataset, violin plots show distribution information that the boxplot is unable to. ```{r} library("vioplot") ``` ```{r, message=FALSE, eval=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) library("vioplot") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) ``` ```{r, message=FALSE, echo=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta") ``` ## Plot Defaults However as we can see here the plot defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") ``` ## Plot colours: Violin Fill Plot colours can be further customised as with the original vioplot package using the `col` argument: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue") ``` ### Vectorisation However the `vioplot` (0.2) function is unable to colour each violin separately, thus this is enabled with a vectorised `col` in `vioplot` (0.3): ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ``` ## Plot colours: Violin Lines and Boxplot Colours can also be customised for the violin fill and border separately using the `col` and `border` arguments: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue") ``` Similarly, the arguments `lineCol` and `rectCol` specify the colours of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour. ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") ``` The same applies to the colour of the median point with `colMed`: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", colMed="violet") ``` ### Combined customisation These can be customised colours can be combined: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") ``` ### Vectorisation These colour and shape settings can also be customised separately for each violin: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) ``` This should be sufficient to customise the violin plot but further examples are given in [the areaEqual vioplot vignette](violin_area.html) including how violin plots are useful for comparing variation when data does not follow the same distribution. This document also compares the violin plot with other established methods to plot data variation. vioplot/vignettes/violin_ylog.Rmd0000755000176200001440000001031614027267745016776 0ustar liggesusers--- title: "Controlling y-axis Plotting" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Controlling y-axis Plotting} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- While boxplots have become the _de facto_ standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Gaussian "Normal" distribution that most researchers have become accustomed to. While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience. ##Violin Plots Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits: - Greater flexibility for plotting variation than boxplots - More familiarity to boxplot users than density plots - Easier to directly compare data types than existing plots As shown below for the `iris` dataset, violin plots show distribution information that the boxplot is unable to. ```{r} library("vioplot") ``` ```{r, message=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) ``` ##Violin y-axis ###Logarithmic scale However the existing violin plot packages (such as \code{\link[vioplot]{vioplot}}) do not support log-scale of the y-axis. This has been amended with the `ylog` argument. ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = T, ylim=c(log(1), log(10))) ``` This can also be invoked with the `log="y"` argument compatible with `boxplot`: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = T, ylim=c(log(1), log(10))) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = "y", ylim=c(log(1), log(10))) ``` ###custom y-axes The y-axes can also be removed with `yaxt="n"` to enable customised y-axes: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = T, yaxt="n", ylim=c(log(1), log(10))) ``` Thus custom axes can be added to violin plots. As shown on a linear scale: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n") axis(2, at=1:10, labels=1:10) ``` As well as for on a log scale: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n", log="y", ylim=c(log(4), log(9))) axis(2, at=log(1:10), labels=1:10) ``` vioplot/vignettes/violin_area.Rmd0000755000176200001440000001714114027267745016737 0ustar liggesusers--- title: "Controlling Violin Plot Area" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Controlling Violin Plot Area} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- While boxplots have become the _de facto_ standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Gaussian "Normal" distribution that most researchers have become accustomed to. While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience. ##Violin Plots Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits: - Greater flexibility for plotting variation than boxplots - More familiarity to boxplot users than density plots - Easier to directly compare data types than existing plots As shown below for the `iris` dataset, violin plots show distribution information that the boxplot is unable to. ```{r} library("vioplot") ``` ```{r, message=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) ``` ##Violin Plot Area However there are concerns that existing violin plot packages (such as \code{\link[vioplot]{vioplot}}) scales the data to the most aesthetically suitable width rather than maintaining proportions comparable across data sets. Consider the differing distributions shown below: ```{r, echo=FALSE, message=FALSE} par(mar=rep(1,4)) ``` ```{r} par(mfrow=c(3, 1)) par(mar=rep(2, 4)) plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length: setosa", col="green") plot(density(iris$Sepal.Length[iris$Species=="versicolor"]), main="Sepal Length: versicolor", col="blue") plot(density(iris$Sepal.Length[iris$Species=="virginica"]), main="Sepal Length: virginica", col="palevioletred4") par(mfrow=c(1, 1)) ``` ```{r, echo=FALSE, message=FALSE} par(mar=c(5, 4, 4, 2) + 0.1) ``` #Comparing datasets Neither of these plots above show the relative distribtions on the same scale, even if we match the x-axis of a density plot the relative heights are obscured and difficult to compare. ```{r, echo=FALSE, message=FALSE} par(mar=rep(2,4)) ``` ```{r} par(mfrow=c(3, 1)) par(mar=rep(2, 4)) xaxis <- c(3, 9) yaxis <- c(0, 1.25) plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length: setosa", col="green", xlim=xaxis, ylim=yaxis) plot(density(iris$Sepal.Length[iris$Species=="versicolor"]), main="Sepal Length: versicolor", col="blue", xlim=xaxis, ylim=yaxis) plot(density(iris$Sepal.Length[iris$Species=="virginica"]), main="Sepal Length: virginica", col="palevioletred4", xlim=xaxis, ylim=yaxis) par(mfrow=c(1, 1)) ``` ```{r, echo=FALSE, message=FALSE} par(mar=c(5, 4, 4, 2) + 0.1) ``` This can somewhat be addressed by overlaying density plots: ```{r} par(mfrow=c(1, 1)) xaxis <- c(3, 9) yaxis <- c(0, 1.25) plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length", col="green", xlim=xaxis, ylim=yaxis) lines(density(iris$Sepal.Length[iris$Species=="versicolor"]), col="blue") lines(density(iris$Sepal.Length[iris$Species=="virginica"]), col="palevioletred4") legend("topright", fill=c("green", "blue", "palevioletred4"), legend=levels(iris$Species), cex=0.5) ``` This has the benefit of highlighting the different distributions of the data subsets. However, notice here that a figure legend become necessary, plot axis limits need to be defined to display the range of all distribution curves, and the plot quickly becomes cluttered if the number of factors to be compared becomes much larger. ##Area control in Violin plot Therefore the `areaEqual` parameter has been added to customise the violin plot to serve a similar purpose: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", areaEqual = T) ``` If we compare this to the original vioplot functionality (defaulting to `areaEqual = FALSE`) the differences between the two are clear. ```{r, echo=FALSE, message=FALSE} par(mar=rep(2, 4)) ``` ```{r} par(mfrow=c(2,1)) par(mar=rep(2, 4)) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Width)", areaEqual = F) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T) par(mfrow=c(1,1)) ``` ```{r, echo=FALSE, message=FALSE} par(mar=c(5, 4, 4, 2) + 0.1) ``` Note that `areaEqual` is considering the full area of the density distribution before removing the outlier tails. We leave it up to the users discretion which they elect to use. The `areaEqual` functionality is compatible with all of the customisation used in discussed in [the main vioplot vignette](violin_customisation.html) ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), rectCol=c("green", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), border=c("darkolivegreen4", "royalblue4", "violetred4")) ``` The violin width can further be scaled with `wex`, which maintains the proportions across the datasets if `areaEqual = TRUE`: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), rectCol=c("green", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), border=c("darkolivegreen4", "royalblue4", "violetred4"), wex=1.25) ``` ## Comparing distributions Notice the utility of `areaEqual` for cases where different datasets have different underlying distributions: ```{r} vioplot(rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), rlnorm(200, 0, 0.5), rnbinom(200, 10, 0.9), rlogis(20, 0, 0.5), areaEqual = F, main="Equal Width", xlab="distribution", ylab="data value", names=c("normal", "poisson", "binomial", "log-normal", "neg-binomial", "logistic")) vioplot(rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), rlnorm(200, 0, 0.5), rnbinom(200, 10, 0.9), rlogis(20, 0, 0.5), areaEqual = T, main="Equal Area", xlab="distribution", ylab="data value", names=c("normal", "poisson", "binomial", "log-normal", "neg-binomial", "logistic")) ``` vioplot/vignettes/violin_formulae.Rmd0000755000176200001440000001011014027267745017626 0ustar liggesusers--- title: "Customising Violin Plots with Formula Input" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Customising Violin Plots with Formula Input} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- Since boxplots have become the _de facto_ standard for plotting the distribution of data most users are familiar with these and the formula input for dataframes. However this input is not available in the standard `vioplot` package. Thus it has been restored here for enhanced backwards compatibility with `boxplot`. As shown below for the `iris` dataset, violin plots show distribution information taking formula input that `boxplot` implements but `vioplot` is unable to. This demonstrates the customisation demonstrated in [the main vioplot vignette using vioplot syntax](violin_customisation.html) with the formula method commonly used for `boxplot`, `t.test`, and `lm`. ```{r} library("vioplot") ``` ```{r, message=FALSE, eval=FALSE} data(iris) boxplot(Sepal.Length~Species, data = iris) ``` ```{r, message=FALSE, echo=FALSE} data(iris) boxplot(Sepal.Length~Species, data = iris, main = "Sepal Length") ``` Whereas performing the same function does not work with `vioplot` (0.2). ```{r, message=FALSE, eval=FALSE} devtools::install_version("vioplot", version = "0.2") library("vioplot") vioplot(Sepal.Length~Species, data = iris) ``` ``` Error in min(data) : invalid 'type' (language) of argument ``` ## Plot Defaults ```{r, message=FALSE, eval=FALSE} vioplot(Sepal.Length~Species, data = iris) ``` ```{r, message=FALSE, echo=FALSE} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="magenta") ``` Another concern we see here is that the `vioplot` defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length") ``` ## Plot colours: Violin Fill Plot colours can be further customised as with the original vioplot package using the `col` argument: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue") ``` ### Vectorisation However the `vioplot` (0.2) function is unable to colour each violin separately, thus this is enabled with a vectorised `col` in `vioplot` (0.3): ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ``` ## Plot colours: Violin Lines and Boxplot Colours can also be customised for the violin fill and border separately using the `col` and `border` arguments: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue") ``` Similarly, the arguments `lineCol` and `rectCol` specify the colours of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour. ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") ``` The same applies to the colour of the median point with `colMed`: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", colMed="violet") ``` ### Combined customisation These can be customised colours can be combined: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") ``` ### Vectorisation These colour and shape settings can also be customised separately for each violin: ```{r} vioplot(Sepal.Length~Species, data = iris, main="Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) ``` vioplot/vignettes/violin_customisation.Rmd0000755000176200001440000001453314027267745020732 0ustar liggesusers--- title: "Customising Violin Plots" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Customising Violin Plots} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- While boxplots have become the _de facto_ standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Gaussian "Normal" distribution that most researchers have become accustomed to. While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience. Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits: - Greater flexibility for plotting variation than boxplots - More familiarity to boxplot users than density plots - Easier to directly compare data types than existing plots As shown below for the `iris` dataset, violin plots show distribution information that the boxplot is unable to. ```{r} library("vioplot") ``` ```{r, message=FALSE, eval=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) library("vioplot") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) ``` ```{r, message=FALSE, echo=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta") ``` ## Plot Defaults However as we can see here the plot defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") ``` ## Plot colours: Violin Fill Plot colours can be further customised as with the original vioplot package using the `col` argument: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue") ``` ### Vectorisation However the `vioplot` (0.2) function is unable to colour each violin separately, thus this is enabled with a vectorised `col` in `vioplot` (0.3): ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ``` ## Plot colours: Violin Lines and Boxplot Colours can also be customised for the violin fill and border separately using the `col` and `border` arguments: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue") ``` Similarly, the arguments `lineCol` and `rectCol` specify the colors of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour. ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") ``` The same applies to the colour of the median point with `colMed`: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", colMed="violet") ``` ### Combined customisation These can be customised colours can be combined: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") ``` ### Vectorisation These color and shape settings can also be customised separately for each violin: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) ``` This should be sufficient to customise the violin plot but further examples are given in [the areaEqual vioplot vignette](violin_area.html) including how violin plots are useful for comparing variation when data does not follow the same distribution. This document also compares the violin plot with other established methods to plot data variation. vioplot/R/0000755000176200001440000000000014344404314012150 5ustar liggesusersvioplot/R/histoplot.R0000755000176200001440000007210714344404314014332 0ustar liggesusers#' histoplot #' #' Produce histogram plot(s) of the given (grouped) values with enhanced annotation and colour per group. Includes customisation of colours for each aspect of the histogram, boxplot, and separate histograms. This supports input of data as a list or formula, being backwards compatible with \code{\link[vioplot]{histoplot}} (0.2) and taking input in a formula as used for \code{\link[graphics]{boxplot}}. #' #' @name histoplot #' @aliases histoplot #' @param x for specifying data from which the boxplots are to be produced. Either a numeric vector, or a single list containing such vectors. Additional unnamed arguments specify further data as separate vectors (each corresponding to a component boxplot). NAs are allowed in the data. #' @param ... additional data vectors or formula parameters. For the formula method, named arguments to be passed to the default method. #' @param formula a formula, such as y ~ grp, where y is a numeric vector of data values to be split into groups according to the grouping variable grp (usually a factor). #' @param data a data.frame (or list) from which the variables in formula should be taken. #' @param use.cols logical indicating if columns (by default) or rows (use.cols = FALSE) should be plotted. #' @param subset an optional vector specifying a subset of observations to be used for plotting. #' @param drop,sep,lex.order defines groups to plot from formula, passed to \code{split.default}, see there. #' @param breaks the breaks for the density estimator, as explained in hist #' @param xlim,ylim numeric vectors of length 2, giving the x and y coordinates ranges. #' @param yaxt A character which specifies the y axis type. Specifying "n" suppresses plotting. #' @param ylog,xlog A logical value (see log in \code{\link[graphics]{plot.default}}). If ylog is TRUE, a logarithmic scale is in use (e.g., after plot(*, log = "y")). For horizontal = TRUE then, if xlog is TRUE, a logarithmic scale is in use (e.g., after plot(*, log = "x")). For a new device, it defaults to FALSE, i.e., linear scale. #' @param log Logarithmic scale if log = "y" or TRUE. Invokes ylog = TRUE. If horizontal is TRUE then invokes xlog = TRUE. #' @param logLab Increments for labelling y-axis on log-scale, defaults to numbers starting with 1, 2, 5, and 10. #' @param names one label, or a vector of labels for the data must match the number of data given #' @param col Graphical parameter for fill colour of the histogram(s) polygon. NA for no fill colour. If col is a vector, it specifies the colour per histogram, and colours are reused if necessary. #' @param border Graphical parameters for the colour of the histogram border passed to lines. NA for no border. If border is a vector, it specifies the colour per histogram, and colours are reused if necessary. #' @param lty,lwd Graphical parameters for the histogram passed to lines and polygon #' @param rectCol Graphical parameters to control fill colour of the box. NA for no fill colour. If col is a vector, it specifies the colour per histogram, and colours are reused if necessary. #' @param lineCol Graphical parameters to control colour of the box outline and whiskers. NA for no border. If lineCol is a vector, it specifies the colour per histogram, and colours are reused if necessary. #' @param pchMed Graphical parameters to control shape of the median point. If pchMed is a vector, it specifies the shape per histogram. #' @param colMed,colMed2 Graphical parameters to control colour of the median point. If colMed is a vector, it specifies the colour per histogram. colMed specifies the fill colour in all cases unless pchMed is 21:25 in which case colMed is the border colour and colMed2 is the fill colour. #' @param drawRect logical. The box is drawn if TRUE. #' @param areaEqual logical. Density plots checked for equal area if TRUE. wex must be scalar, relative widths of histograms depend on area. #' @param at position of each histogram. Default to 1:n #' @param add logical. if FALSE (default) a new plot is created #' @param wex relative expansion of the histogram. If wex is a vector, it specifies the area/width size per histogram and sizes are reused if necessary. #' @param horizontal logical. To use horizontal or vertical histograms. Note that log scale can only be used on the x-axis for horizontal histograms, and on the y-axis otherwise. #' @param main,sub,xlab,ylab graphical parameters passed to plot. #' @param cex A numerical value giving the amount by which plotting text should be magnified relative to the default. #' @param cex.axis The magnification to be used for y axis annotation relative to the current setting of cex. #' @param cex.names The magnification to be used for x axis annotation relative to the current setting of cex. Takes the value of cex.axis if not given. #' @param cex.lab The magnification to be used for x and y labels relative to the current setting of cex. #' @param cex.main The magnification to be used for main titles relative to the current setting of cex. #' @param cex.sub The magnification to be used for sub-titles relative to the current setting of cex. #' @param na.action a function which indicates what should happen when the data contain NAs. The default is to ignore missing values in either the response or the group. #' @param na.rm logical value indicating whether NA values should be stripped before the computation proceeds. Defaults to TRUE. #' @param side defaults to "both". Assigning "left" or "right" enables one sided plotting of histograms. May be applied as a scalar across all groups. #' @param axes,frame.plot,panel.first,panel.last,asp,line,outer,adj,ann,ask,bg,bty,cin,col.axis,col.lab,col.main,col.sub,cra,crt,csi,cxy,din,err,family,fg,fig,fin,font,font.axis,font.lab,font.main,font.sub,lab,las,lend,lheight,ljoin,lmitre,mai,mar,mex,mfcol,mfg,mfrow,mgp,mkh,new,oma,omd,omi,page,pch,pin,plt,ps,pty,smo,srt,tck,tcl,usr,xaxp,xaxs,xaxt,xpd,yaxp,yaxs,ylbias Arguments to be passed to methods, such as graphical parameters (see \code{\link[graphics]{par}})). #' @keywords plot graphics histogram #' @import sm #' @importFrom zoo rollmean #' @importFrom stats median na.omit quantile #' @importFrom graphics Axis axis box lines par plot.new plot.window plot.xy points polygon rect title #' @importFrom grDevices boxplot.stats dev.flush dev.hold dev.interactive devAskNewPage xy.coords #' @export #' @examples #' #' # box- vs histogram-plot #' par(mfrow=c(2,1)) #' mu<-2 #' si<-0.6 #' bimodal<-c(rnorm(1000,-mu,si),rnorm(1000,mu,si)) #' uniform<-runif(2000,-4,4) #' normal<-rnorm(2000,0,3) #' histoplot(bimodal,uniform,normal) #' boxplot(bimodal,uniform,normal) #' #' # add to an existing plot #' x <- rnorm(100) #' y <- rnorm(100) #' plot(x, y, xlim=c(-5,5), ylim=c(-5,5)) #' histoplot(x, col="tomato", horizontal=TRUE, at=-4, add=TRUE,lty=2, rectCol="gray") #' histoplot(y, col="cyan", horizontal=FALSE, at=-4, add=TRUE,lty=2) #' #' # formula input #' data("iris") #' histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", #' col=c("lightgreen", "lightblue", "palevioletred")) #' legend("topleft", legend=c("setosa", "versicolor", "virginica"), #' fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) #' #' data("diamonds", package = "ggplot2") #' palette <- RColorBrewer::brewer.pal(9, "Pastel1") #' par(mfrow=c(3, 1)) #' histoplot(price ~ cut, data = diamonds, las = 1, col = palette) #' histoplot(price ~ clarity, data = diamonds, las = 2, col = palette) #' histoplot(price ~ color, data = diamonds, las = 2, col = palette) #' par(mfrow=c(3, 1)) #' #' #generate example data #' data_one <- rnorm(100) #' data_two <- rnorm(50, 1, 2) #' #' #generate histogram plot with similar functionality to histoplot #' histoplot(data_one, data_two, col="magenta") #' #' #note vioplox defaults to a greyscale plot #' histoplot(data_one, data_two) #' #' #colours can be customised separately, with axis labels, legends, and titles #' histoplot(data_one, data_two, col=c("red","blue"), names=c("data one", "data two"), #' main="data histogram", xlab="data class", ylab="data read") #' legend("topleft", fill=c("red","blue"), legend=c("data one", "data two")) #' #' #colours can be customised for the histogram fill and border separately #' histoplot(data_one, data_two, col="grey85", border="purple", names=c("data one", "data two"), #' main="data histogram", xlab="data class", ylab="data read") #' #' #colours can also be customised for the boxplot rectange and lines (border and whiskers) #' histoplot(data_one, data_two, col="grey85", rectCol="lightblue", lineCol="blue", #' border="purple", names=c("data one", "data two"), #' main="data histogram", xlab="data class", ylab="data read") #' #' #these colours can also be customised separately for each histogram #' histoplot(data_one, data_two, col=c("skyblue", "plum"), rectCol=c("lightblue", "palevioletred"), #' lineCol="blue", border=c("royalblue", "purple"), names=c("data one", "data two"), #' main="data histogram", xlab="data class", ylab="data read") #' #' #this applies to any number of histograms, given that colours are provided for each #' histoplot(data_one, data_two, rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), #' col=c("red", "orange", "green", "blue", "violet"), #' rectCol=c("palevioletred", "peachpuff", "lightgreen", "lightblue", "plum"), #' lineCol=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), #' border=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), #' names=c("data one", "data two", "data three", "data four", "data five"), #' main="data histogram", xlab="data class", ylab="data read") #' #' #The areaEqual parameter scales with width of histograms #' #histograms will have equal density area (including missing tails) rather than equal maximum width #' histoplot(data_one, data_two, areaEqual=TRUE) #' #' histoplot(data_one, data_two, areaEqual=TRUE, #' col=c("skyblue", "plum"), rectCol=c("lightblue", "palevioletred"), #' lineCol="blue", border=c("royalblue", "purple"), names=c("data one", "data two"), #' main="data histogram", xlab="data class", ylab="data read") #' #' histoplot(data_one, data_two, rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), #' areaEqual=TRUE, col=c("red", "orange", "green", "blue", "violet"), #' rectCol=c("palevioletred", "peachpuff", "lightgreen", "lightblue", "plum"), #' lineCol=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), #' border=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), #' names=c("data one", "data two", "data three", "data four", "data five"), #' main="data histogram", xlab="data class", ylab="data read") #' @export #' @usage NULL histoplot <- function(x, ...) { UseMethod("histoplot") } #' Draw a histogram plot for each Column (Row) of a Matrix #' #' Interpreting the columns (or rows) of a matrix as different groups, draw a boxplot for each. #' #' @aliases histogram.matrix histoplot.matrix #' @param x a numeric matrix. #' @param use.cols logical indicating if columns (by default) or rows (use.cols = FALSE) should be plotted. #' @param ... Further arguments to \code{\link[vioplot]{histoplot}}. #' @rdname histoplot #' @export histoplot.matrix <- function (x, use.cols = TRUE, ...) { groups <- if (use.cols) { split(c(x), rep.int(1L:ncol(x), rep.int(nrow(x), ncol(x)))) } else split(c(x), seq(nrow(x))) if (length(nam <- dimnames(x)[[1 + use.cols]])) names(groups) <- nam invisible(histoplot(groups, ...)) } #' @rdname histoplot #' @export histoplot.list <- function (x, ...){ ind <- sapply(x, is.numeric) if(all(!ind)){ stop(paste("elements are not numeric: ", names(x)[!sapply(x, is.numeric)])) } if(any(!ind)){ warning(paste("some elements are not numeric: ", names(x)[!sapply(x, is.numeric)])) x <- x[sapply(x, is.numeric)] } invisible(histoplot.default(x, ...)) } #' @rdname histoplot #' @export histoplot.data.frame <- histoplot.list #' @rdname histoplot #' @export histoplot.matrix <- histoplot.matrix #' @rdname histoplot #' @export histoplot.formula <- function (formula, data = NULL, ..., subset, na.action = NULL, add = FALSE, ann = !add, horizontal = FALSE, side = "both", xlab = mklab(y_var = horizontal), ylab = mklab(y_var = !horizontal), names=NULL, drop = FALSE, sep = ".", lex.order = FALSE) { if (missing(formula) || (length(formula) != 3L)){ stop("'formula' missing or incorrect") } if(add && side != "both"){ if(!is.null(names)) warning("Warning: names can only be changed on first call of histoplot (when add = FALSE) ") if(!missing(xlab)) warning("Warning: x-axis labels can only be changed on first call of histoplot (when add = FALSE) ") if(!missing(ylab)) warning("Warning: y-axis labels can only be changed on first call of histoplot (when add = FALSE) ") } if (missing(xlab) || missing(ylab)){ mklab <- function(y_var){ if(y_var){ names(mf)[response] } else { paste(names(mf)[-response], collapse = " : ") } } } m <- match.call(expand.dots = FALSE) if (is.matrix(eval(m$data, parent.frame()))){ m$data <- as.data.frame(data) } m$... <- m$drop <- m$sep <- m$lex.order <- NULL m$xlab <- m$ylab <- m$add <- m$ann <- m$horizontal <- NULL m$names <- m$side <- NULL m$na.action <- na.action m[[1L]] <- quote(stats::model.frame.default) mf <- eval(m, parent.frame()) response <- attr(attr(mf, "terms"), "response") if(add){ xlab <- ylab <- NA } x <- split(mf[[response]], mf[-response], drop = drop, sep = sep, lex.order = lex.order) histoplot(x, xlab = xlab, ylab = ylab, names = names, add = add, ann = ann, horizontal = horizontal, side = side, ...) } #' @rdname histoplot #' @export histoplot.default <- function (x, ..., data = NULL, breaks = 'Sturges', xlim = NULL, ylim = NULL, names = NULL, horizontal = FALSE, col = "grey50", border = par()$fg, lty = 1, lwd = 1, rectCol = par()$fg, lineCol = par()$fg, pchMed = 19, colMed = "white", colMed2 = "grey 75", at, add = FALSE, wex = 1, drawRect = TRUE, areaEqual=FALSE, axes = TRUE, frame.plot = axes, panel.first = NULL, panel.last = NULL, asp = NA, main="", sub="", xlab=NA, ylab=NA, line = NA, outer = FALSE, xlog = NA, ylog=NA, adj=NA, ann = NA, ask=NA, bg=NA, bty=NA, cex=NA, cex.axis=NA, cex.lab=NA, cex.main=NA, cex.names=NULL, cex.sub=NA, cin=NA, col.axis=NA, col.lab=NA, col.main=NA, col.sub=NA, cra=NA, crt=NA, csi=NA,cxy=NA, din=NA, err=NA, family=NA, fg=NA, fig=NA, fin=NA, font=NA, font.axis=NA, font.lab=NA, font.main=NA, font.sub=NA, lab=NA, las=NA, lend=NA, lheight=NA, ljoin=NA, lmitre=NA, mai=NA, mar=NA, mex=NA, mfcol=NA, mfg=NA, mfrow=NA, mgp=NA, mkh=NA, new=NA, oma=NA, omd=NA, omi=NA, page=NA, pch=NA, pin=NA, plt=NA, ps=NA, pty=NA, smo=NA, srt=NA, tck=NA, tcl=NA, usr=NA, xaxp=NA, xaxs=NA, xaxt=NA, xpd=NA, yaxp=NA, yaxs=NA, yaxt=NA, ylbias=NA, log="", logLab=c(1,2,5), na.action = NULL, na.rm = T, side = "both") { #assign graphical parameters if not given for(ii in 1:length(names(par()))){ if(is.na(get(names(par())[ii])[1])) assign(names(par()[ii]), unlist(par()[[ii]])) } if(add && side != "both"){ if(!is.null(names)) warning("Warning: names can only be changed on first call of histoplot (when add = FALSE) ") if(!is.na(xlab)) warning("Warning: x-axis labels can only be changed on first call of histoplot (when add = FALSE) ") if(!is.na(ylab)) warning("vy-axis labels can only be changed on first call of histoplot (when add = FALSE) ") if(!missing(main)) warning("Warning: main title can only be changed on first call of histoplot (when add = FALSE) ") if(!missing(sub)) warning("Warning: subtitle can only be changed on first call of histoplot (when add = FALSE) ") } if(!is.list(x)){ datas <- list(x, ...) } else{ datas <- lapply(x, unlist) if(is.null(names)){ names <- names(datas) } } datas <- lapply(datas, function(x){ if(all(x == unique(x)[1]) & length(x) > 100){ unique(x)[1] } else { x } }) if(is.character(log)) if("y" %in% unlist(strsplit(log, ""))) log <- TRUE if(is.na(xlog) | (horizontal == TRUE & (log == FALSE | log == ""))) xlog <- FALSE log <- ifelse(log == TRUE, "y", "") if(log == 'x' | log == 'xy' | xlog == TRUE){ if(horizontal | log == "xy"){ log <- TRUE } else { log <- FALSE ylog <- FALSE } xlog <- FALSE } if(log == TRUE | ylog == TRUE){ ylog <- TRUE log <- "y" } else { log <- "" } if(ylog){ #check data is compatible with log scale if(all(unlist(datas) <= 0)){ ylog <- FALSE warning("log scale cannot be used with non-positive data") } else { #log-scale data datas <- datas #lapply(datas, function(x) log(unlist(x))) } } if(is.null(na.action)) na.action <- na.omit lapply(datas, function(data) data <- data[!sapply(data, is.infinite)]) if(na.rm) datas <- lapply(datas, na.action) n <- length(datas) #if(is.list(datas)) datas <- as.data.frame(datas) if (missing(at)){ at <- 1:n } #upper <- vector(mode = "numeric", length = n) #lower <- vector(mode = "numeric", length = n) q1 <- vector(mode = "numeric", length = n) q2 <- vector(mode = "numeric", length = n) q3 <- vector(mode = "numeric", length = n) med <- vector(mode = "numeric", length = n) base <- vector(mode = "list", length = n) height <- vector(mode = "list", length = n) area_check <- vector(mode = "list", length = n) baserange <- c(Inf, -Inf) args <- list(plot = FALSE, breaks = breaks) radj <- ifelse(side == "right", 0, 1) ladj <- ifelse(side == "left", 0, 1) boxwex <- wex if(areaEqual){ for (i in 1:n) { data <- unlist(datas[[i]]) data.min <- min(data, na.rm = na.rm) data.max <- max(data, na.rm = na.rm) q1[i] <- quantile(data, 0.25) q2[i] <- quantile(data, 0.5) q3[i] <- quantile(data, 0.75) med[i] <- median(data) iqd <- q3[i] - q1[i] #upper[i] <- min(q3[i] + range * iqd, data.max) #lower[i] <- max(q1[i] - range * iqd, data.min) #est.xlim <- c(min(lower[i], data.min), max(upper[i], data.max)) smout <- do.call("hist", c(list(data), args)) Avg.pos <- mean(smout$mids) xt <- diff(smout$mids[smout$mids1){ warning("wex may not be a vector if areaEqual is TRUE") print("using first element of wex") wex<-wex[i] } wex <-unlist(area_check)/max(unlist(area_check))*wex } for (i in 1:n) { data <- unlist(datas[[i]]) data.min <- min(data, na.rm = na.rm) data.max <- max(data, na.rm = na.rm) q1[i] <- quantile(data, 0.25) q2[i] <- quantile(data, 0.5) q3[i] <- quantile(data, 0.75) med[i] <- median(data) iqd <- q3[i] - q1[i] #upper[i] <- min(q3[i] + range * iqd, data.max) #lower[i] <- max(q1[i] - range * iqd, data.min) #est.xlim <- c(min(lower[i], data.min), max(upper[i], data.max)) smout <- do.call("hist", c(list(data), args)) hscale <- 0.4/max(smout$density) * ifelse(length(wex)>1, wex[i], wex) base[[i]] <- smout$breaks height[[i]] <- smout$density * hscale t <- range(base[[i]]) baserange[1] <- min(baserange[1], t[1]) baserange[2] <- max(baserange[2], t[2]) } if (!add) { if (is.null(xlim)) { xlim <- if (n == 1){ at + c(-0.5, 0.5) } else { range(at) + min(diff(at))/2 * c(-1, 1) } } else { xlim.default <- if (n == 1){ at + c(-0.5, 0.5) } else { range(at) + min(diff(at))/2 * c(-1, 1) } print(paste0("Using c(", xlim[1],",", xlim[2], ") as input for xlim, note that default values for these dimensions are c(", xlim.default[1],",", xlim.default[2], ")")) } if (is.null(ylim)) { ylim <- baserange } } if (is.null(names)) { label <- 1:n } else { label <- names } boxwidth <- 0.05 * ifelse(length(boxwex)>1, boxwex[i], boxwex) if (!add){ plot.new() if(!horizontal){ plot.window(xlim, ylim, log = log, asp = asp, bty = bty, cex = cex, xaxs = xaxs, yaxs = yaxs, lab = lab, mai = mai, mar = mar, mex = mex, mfcol = mfcol, mfrow = mfrow, mfg = mfg, xlog = xlog, ylog = ylog) } else { plot.window(ylim, xlim, log = ifelse(log == "y", "x", ""), asp = asp, bty = bty, cex = cex, xaxs = xaxs, yaxs = yaxs, lab = lab, mai = mai, mar = mar, mex = mex, mfcol = mfcol, mfrow = mfrow, mfg = mfg, xlog = ylog, ylog = xlog) } } panel.first if (!horizontal) { if (!add) { plot.window(xlim, ylim, log = log, asp = asp, bty = bty, cex = cex, xaxs = xaxs, yaxs = yaxs, lab = lab, mai = mai, mar = mar, mex = mex, mfcol = mfcol, mfrow = mfrow, mfg = mfg, xlog = xlog, ylog = ylog) xaxp <- par()$xaxp yaxp <- par()$yaxp if(yaxt !="n"){ if(ylog){ #log_axis_label <- log_axis_label[log_axis >= exp(par("usr")[3])] #log_axis <- log_axis[log_axis >= exp(par("usr")[3])] #log_axis_label <- log_axis_label[log_axis <= exp(par("usr")[4])] #log_axis <- log_axis[log_axis <= exp(par("usr")[4])] Axis(unlist(datas), side = 2, cex.axis = cex.axis, col.axis = col.axis, font.axis = font.axis, mgp = mgp, tck = tck, tcl = tcl, las = las) # xaxp = xaxp, yaxp = yaxp disabled for log if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, tck = tck, tcl = tcl, las = las) # xaxp = xaxp, yaxp = yaxp disabled for log } } else { Axis(unlist(datas), side = 2, cex.axis = cex.axis, col.axis = col.axis, font.axis = font.axis, mgp = mgp, yaxp = yaxp, tck = tck, tcl = tcl, las = las) if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, xaxp = xaxp, tck = tck, tcl = tcl, las = las) } } } else { if(ylog){ if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, tck = tck, tcl = tcl, las = las) # xaxp = xaxp, yaxp = yaxp disabled for log } } else { if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, xaxp = xaxp, tck = tck, tcl = tcl, las = las) } } } } if (frame.plot) { box(lty = lty, lwd = lwd) } for (i in 1:n) { colp <- ifelse(length(col)>1,col[1+(i-1)%%length(col)], col) borderp <- ifelse(length(border)>1, border[1+(i-1)%%length(border)], border) nB <- length(base[[i]]) #xp <- c(at[i] - radj*height[[i]], rev(at[i] + ladj*height[[i]])) #yp <- c(base[[i]], rev(base[[i]])) #polygon(xp, yp, col=colp, border=borderp, #lty = lty, lwd = lwd, xpd = xpd, lend = lend, ljoin = ljoin, lmitre = lmitre) x0 <- at[i]- radj*height[[i]] y0 <- base[[i]][-nB] x1 <- at[i]+ ladj*height[[i]] y1 <- base[[i]][-1L] rect(x0, y0, x1, y1, col = colp, border = borderp, lty = lty) if (drawRect) { #lines(at[c(i, i)], c(lower[i], upper[i]), lwd = lwd, # lty = lty, col = ifelse(length(lineCol)>1, lineCol[1+(i-1)%%length(lineCol)], lineCol), lend = lend, ljoin = ljoin, lmitre = lmitre) rect(at[i] - radj*ifelse(length(boxwidth)>1, boxwidth[i], boxwidth)/2, q1[i], at[i] + ladj*ifelse(length(boxwidth)>1, boxwidth[i], boxwidth)/2, q3[i], col = ifelse(length(rectCol)>1, rectCol[1+(i-1)%%length(rectCol)], rectCol), border = ifelse(length(lineCol)>1, lineCol[1+(i-1)%%length(lineCol)], lineCol), xpd = xpd, lend = lend, ljoin = ljoin, lmitre = lmitre) points(at[i], med[i], pch = ifelse(length(pchMed)>1, pchMed[1+(i-1)%%length(pchMed)], pchMed), col = ifelse(length(colMed)>1, colMed[1+(i-1)%%length(colMed)], colMed), bg = ifelse(length(colMed2)>1, colMed2[1+(i-1)%%length(colMed2)], colMed2), cex = cex, lwd = lwd, lty = lty) } } } else { if(log == "y" || ylog == TRUE){ log <- "x" xlog <- TRUE ylog <- FALSE } if (!add) { plot.window(ylim, xlim, log = log, asp = asp, bty = bty, cex = cex, xaxs = xaxs, yaxs = yaxs, lab = lab, mai = mai, mar = mar, mex = mex, mfcol = mfcol, mfrow = mfrow, mfg = mfg, xlog = xlog, ylog = ylog) xaxp <- par()$xaxp yaxp <- par()$yaxp if(yaxt !="n"){ if(xlog){ #log_axis_label <- log_axis_label[log_axis >= exp(par("usr")[3])] #log_axis <- log_axis[log_axis >= exp(par("usr")[3])] #log_axis_label <- log_axis_label[log_axis <= exp(par("usr")[4])] #log_axis <- log_axis[log_axis <= exp(par("usr")[4])] Axis(unlist(datas), side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, tck = tck, tcl = tcl, las = las) # xaxp = xaxp, yaxp = yaxp disabled for log if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 2, cex.axis = cex.axis, col.axis = col.axis, font.axis = font.axis, mgp = mgp, tck = tck, tcl = tcl, las = las) # xaxp = xaxp, yaxp = yaxp disabled for log } } else { Axis(unlist(datas), side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, xaxp = xaxp, tck = tck, tcl = tcl, las = las) if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 2, cex.axis = cex.axis, col.axis = col.axis, font.axis = font.axis, mgp = mgp, yaxp = yaxp, tck = tck, tcl = tcl, las = las) } } } else { if(ylog){ if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, tck = tck, tcl = tcl, las = las) # xaxp = xaxp, yaxp = yaxp disabled for log } } else { if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, xaxp = xaxp, tck = tck, tcl = tcl, las = las) } } } } if (frame.plot) { box(lty = lty, lwd = lwd) } for (i in 1:n) { colp <- ifelse(length(col)>1,col[1+(i-1)%%length(col)], col) borderp <- ifelse(length(border)>1, border[1+(i-1)%%length(border)], border) nB <- length(height[[i]]) #xp <- c(at[i] - radj*height[[i]], rev(at[i] + ladj*height[[i]])) #yp <- c(base[[i]], rev(base[[i]])) #polygon(xp, yp, col=colp, border=borderp, #lty = lty, lwd = lwd, xpd = xpd, lend = lend, ljoin = ljoin, lmitre = lmitre) x0 <- at[i]- radj*height[[i]] y0 <- base[[i]][-nB] x1 <- at[i]+ ladj*height[[i]] y1 <- base[[i]][-1L] rect(x0, y0, x1, y1, col = colp, border = borderp, lty = lty) if (drawRect) { #lines(c(lower[i], upper[i]), at[c(i, i)], lwd = lwd, # lty = lty, col = ifelse(length(lineCol)>1, lineCol[1+(i-1)%%length(lineCol)], lineCol), lend = lend, ljoin = ljoin, lmitre = lmitre) rect(q1[i], at[i] - radj*ifelse(length(boxwidth)>1, boxwidth[i], boxwidth)/2, q3[i], at[i] + ladj*ifelse(length(boxwidth)>1, boxwidth[i], boxwidth)/2, col = ifelse(length(rectCol)>1, rectCol[1+(i-1)%%length(rectCol)], rectCol), border = ifelse(length(lineCol)>1, lineCol[1+(i-1)%%length(lineCol)], lineCol), xpd = xpd, lend = lend, ljoin = ljoin, lmitre = lmitre) points(med[i], at[i], pch = ifelse(length(pchMed)>1, pchMed[1+(i-1)%%length(pchMed)], pchMed), col = ifelse(length(colMed)>1, colMed[1+(i-1)%%length(colMed)], colMed), , bg = ifelse(length(colMed2)>1, colMed2[1+(i-1)%%length(colMed2)], colMed2), cex = cex, lwd = lwd, lty = lty) } } } panel.last if (ann) { title(main = main, sub = sub, xlab = xlab, ylab = ylab, line = line, outer = outer, xpd = xpd, cex.main = cex.main, col.main = col.main, font.main = font.main) } invisible(list( #upper = upper, lower = lower, median = med, q1 = q1, q3 = q3)) } vioplot/R/vioplot.stats.R0000644000176200001440000000216114344405146015130 0ustar liggesusers#' Violin Plot Statistics #' #' This function is typically called by another function to gather the statistics necessary #' for producing box plots, but may be invoked separately. See: \code{\link[grDevices]{boxplot.stats}} #' #' @aliases violin.stats violinplot.stats #' @rdname violin.stats #' @param x a numeric vector for which the violin plot will be constructed \code{NA}s and \code{NaN}s are allowed and omitted). #' @param coef this determines how far the plot ‘whiskers’ extend out from the box. If coef is positive, the #' whiskers extend to the most extreme data point which is no more than coef times the length of the box away #' from the box. A value of zero causes the whiskers to extend to the data extremes (and no outliers be returned). #' @param do.conf,do.out logicals; if FALSE, the conf or out component respectively will be empty in the result. #' @param ... arguments passed to \code{\link[vioplot]{vioplot}}. #' @importFrom grDevices boxplot.stats #' @export vioplot.stats <- function(x, coef = 1.5, do.conf = TRUE, do.out = TRUE, ...){ boxplot.stats(x, coef = coef, do.conf = do.conf, do.out = do.out) } vioplot/R/vioplot.R0000755000176200001440000007417414344404314014007 0ustar liggesusers#' Violin Plot #' #' Produce violin plot(s) of the given (grouped) values with enhanced annotation and colour per group. Includes customisation of colours for each aspect of the violin, boxplot, and separate violins. This supports input of data as a list or formula, being backwards compatible with \code{\link[vioplot]{vioplot}} (0.2) and taking input in a formula as used for \code{\link[graphics]{boxplot}}. #' #' @name vioplot #' @aliases violinplot #' @param x for specifying data from which the boxplots are to be produced. Either a numeric vector, or a single list containing such vectors. Additional unnamed arguments specify further data as separate vectors (each corresponding to a component boxplot). NAs are allowed in the data. #' @param ... additional data vectors or formula parameters. For the formula method, named arguments to be passed to the default method. #' @param formula a formula, such as y ~ grp, where y is a numeric vector of data values to be split into groups according to the grouping variable grp (usually a factor). #' @param data a data.frame (or list) from which the variables in formula should be taken. #' @param use.cols logical indicating if columns (by default) or rows (use.cols = FALSE) should be plotted. #' @param subset an optional vector specifying a subset of observations to be used for plotting. #' @param drop,sep,lex.order defines groups to plot from formula, passed to \code{split.default}, see there. #' @param range a factor to calculate the upper/lower adjacent values #' @param h the height for the density estimator, if omit as explained in sm.density, h will be set to an optimum. A vector of length one, two or three, defining the smoothing parameter. A normal kernel function is used and h is its standard deviation. If this parameter is omitted, a normal optimal smoothing parameter is used. #' @param xlim,ylim numeric vectors of length 2, giving the x and y coordinates ranges. #' @param yaxt A character which specifies the y axis type. Specifying "n" suppresses plotting. #' @param ylog,xlog A logical value (see log in \code{\link[graphics]{plot.default}}). If ylog is TRUE, a logarithmic scale is in use (e.g., after plot(*, log = "y")). For horizontal = TRUE then, if xlog is TRUE, a logarithmic scale is in use (e.g., after plot(*, log = "x")). For a new device, it defaults to FALSE, i.e., linear scale. #' @param log Logarithmic scale if log = "y" or TRUE. Invokes ylog = TRUE. If horizontal is TRUE then invokes xlog = TRUE. #' @param logLab Increments for labelling y-axis on log-scale, defaults to numbers starting with 1, 2, 5, and 10. #' @param names one label, or a vector of labels for the data must match the number of data given #' @param col Graphical parameter for fill colour of the violin(s) polygon. NA for no fill colour. If col is a vector, it specifies the colour per violin, and colours are reused if necessary. #' @param border Graphical parameters for the colour of the violin border passed to lines. NA for no border. If border is a vector, it specifies the colour per violin, and colours are reused if necessary. #' @param lty,lwd Graphical parameters for the violin passed to lines and polygon #' @param rectCol Graphical parameters to control fill colour of the box. NA for no fill colour. If col is a vector, it specifies the colour per violin, and colours are reused if necessary. #' @param lineCol Graphical parameters to control colour of the box outline and whiskers. NA for no border. If lineCol is a vector, it specifies the colour per violin, and colours are reused if necessary. #' @param pchMed Graphical parameters to control shape of the median point. If pchMed is a vector, it specifies the shape per violin. #' @param colMed,colMed2 Graphical parameters to control colour of the median point. If colMed is a vector, it specifies the colour per violin. colMed specifies the fill colour in all cases unless pchMed is 21:25 in which case colMed is the border colour and colMed2 is the fill colour. #' @param drawRect logical. The box is drawn if TRUE. #' @param areaEqual logical. Density plots checked for equal area if TRUE. wex must be scalar, relative widths of violins depend on area. #' @param at position of each violin. Default to 1:n #' @param add logical. if FALSE (default) a new plot is created #' @param wex relative expansion of the violin. If wex is a vector, it specifies the area/width size per violin and sizes are reused if necessary. #' @param horizontal logical. To use horizontal or vertical violins. Note that log scale can only be used on the x-axis for horizontal violins, and on the y-axis otherwise. #' @param main,sub,xlab,ylab graphical parameters passed to plot. #' @param cex A numerical value giving the amount by which plotting text should be magnified relative to the default. #' @param cex.axis The magnification to be used for y axis annotation relative to the current setting of cex. #' @param cex.names The magnification to be used for x axis annotation relative to the current setting of cex. Takes the value of cex.axis if not given. #' @param cex.lab The magnification to be used for x and y labels relative to the current setting of cex. #' @param cex.main The magnification to be used for main titles relative to the current setting of cex. #' @param cex.sub The magnification to be used for sub-titles relative to the current setting of cex. #' @param na.action a function which indicates what should happen when the data contain NAs. The default is to ignore missing values in either the response or the group. #' @param na.rm logical value indicating whether NA values should be stripped before the computation proceeds. Defaults to TRUE. #' @param side defaults to "both". Assigning "left" or "right" enables one sided plotting of violins. May be applied as a scalar across all groups. #' @param plotCentre defaults to "points", plotting a central point at the median. If "line" is given a median line is plotted (subject to side) alternatively. #' @param axes,frame.plot,panel.first,panel.last,asp,line,outer,adj,ann,ask,bg,bty,cin,col.axis,col.lab,col.main,col.sub,cra,crt,csi,cxy,din,err,family,fg,fig,fin,font,font.axis,font.lab,font.main,font.sub,lab,las,lend,lheight,ljoin,lmitre,mai,mar,mex,mfcol,mfg,mfrow,mgp,mkh,new,oma,omd,omi,page,pch,pin,plt,ps,pty,smo,srt,tck,tcl,usr,xaxp,xaxs,xaxt,xpd,yaxp,yaxs,ylbias Arguments to be passed to methods, such as graphical parameters (see \code{\link[graphics]{par}})). #' @keywords plot graphics violin #' @import sm #' @importFrom zoo rollmean #' @importFrom stats median na.omit quantile #' @importFrom graphics Axis axis box lines par plot.new plot.window plot.xy points polygon rect title #' @importFrom grDevices boxplot.stats dev.flush dev.hold dev.interactive devAskNewPage xy.coords #' @export #' @examples #' #' # box- vs violin-plot #' par(mfrow=c(2,1)) #' mu<-2 #' si<-0.6 #' bimodal<-c(rnorm(1000,-mu,si),rnorm(1000,mu,si)) #' uniform<-runif(2000,-4,4) #' normal<-rnorm(2000,0,3) #' vioplot(bimodal,uniform,normal) #' boxplot(bimodal,uniform,normal) #' #' # add to an existing plot #' x <- rnorm(100) #' y <- rnorm(100) #' plot(x, y, xlim=c(-5,5), ylim=c(-5,5)) #' vioplot(x, col="tomato", horizontal=TRUE, at=-4, add=TRUE,lty=2, rectCol="gray") #' vioplot(y, col="cyan", horizontal=FALSE, at=-4, add=TRUE,lty=2) #' #' # formula input #' data("iris") #' vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", #' col=c("lightgreen", "lightblue", "palevioletred")) #' legend("topleft", legend=c("setosa", "versicolor", "virginica"), #' fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) #' #' data("diamonds", package = "ggplot2") #' palette <- RColorBrewer::brewer.pal(9, "Pastel1") #' par(mfrow=c(3, 1)) #' vioplot(price ~ cut, data = diamonds, las = 1, col = palette) #' vioplot(price ~ clarity, data = diamonds, las = 2, col = palette) #' vioplot(price ~ color, data = diamonds, las = 2, col = palette) #' par(mfrow=c(3, 1)) #' #' #generate example data #' data_one <- rnorm(100) #' data_two <- rnorm(50, 1, 2) #' #' #generate violin plot with similar functionality to vioplot #' vioplot(data_one, data_two, col="magenta") #' #' #note vioplox defaults to a greyscale plot #' vioplot(data_one, data_two) #' #' #colours can be customised separately, with axis labels, legends, and titles #' vioplot(data_one, data_two, col=c("red","blue"), names=c("data one", "data two"), #' main="data violin", xlab="data class", ylab="data read") #' legend("topleft", fill=c("red","blue"), legend=c("data one", "data two")) #' #' #colours can be customised for the violin fill and border separately #' vioplot(data_one, data_two, col="grey85", border="purple", names=c("data one", "data two"), #' main="data violin", xlab="data class", ylab="data read") #' #' #colours can also be customised for the boxplot rectange and lines (border and whiskers) #' vioplot(data_one, data_two, col="grey85", rectCol="lightblue", lineCol="blue", #' border="purple", names=c("data one", "data two"), #' main="data violin", xlab="data class", ylab="data read") #' #' #these colours can also be customised separately for each violin #' vioplot(data_one, data_two, col=c("skyblue", "plum"), rectCol=c("lightblue", "palevioletred"), #' lineCol="blue", border=c("royalblue", "purple"), names=c("data one", "data two"), #' main="data violin", xlab="data class", ylab="data read") #' #' #this applies to any number of violins, given that colours are provided for each #' vioplot(data_one, data_two, rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), #' col=c("red", "orange", "green", "blue", "violet"), #' rectCol=c("palevioletred", "peachpuff", "lightgreen", "lightblue", "plum"), #' lineCol=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), #' border=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), #' names=c("data one", "data two", "data three", "data four", "data five"), #' main="data violin", xlab="data class", ylab="data read") #' #' #The areaEqual parameter scales with width of violins #' #Violins will have equal density area (including missing tails) rather than equal maximum width #' vioplot(data_one, data_two, areaEqual=TRUE) #' #' vioplot(data_one, data_two, areaEqual=TRUE, #' col=c("skyblue", "plum"), rectCol=c("lightblue", "palevioletred"), #' lineCol="blue", border=c("royalblue", "purple"), names=c("data one", "data two"), #' main="data violin", xlab="data class", ylab="data read") #' #' vioplot(data_one, data_two, rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), #' areaEqual=TRUE, col=c("red", "orange", "green", "blue", "violet"), #' rectCol=c("palevioletred", "peachpuff", "lightgreen", "lightblue", "plum"), #' lineCol=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), #' border=c("red4", "orangered", "forestgreen", "royalblue", "mediumorchid"), #' names=c("data one", "data two", "data three", "data four", "data five"), #' main="data violin", xlab="data class", ylab="data read") #' @export #' @usage NULL vioplot <- function(x, ...) { UseMethod("vioplot") } #' Draw a Violin plot for each Column (Row) of a Matrix #' #' Interpreting the columns (or rows) of a matrix as different groups, draw a boxplot for each. #' #' @aliases violin.matrix violinplot.matrix #' @param x a numeric matrix. #' @param use.cols logical indicating if columns (by default) or rows (use.cols = FALSE) should be plotted. #' @param ... Further arguments to \code{\link[vioplot]{vioplot}}. #' @rdname vioplot #' @export vioplot.matrix <- function (x, use.cols = TRUE, ...) { groups <- if (use.cols) { split(c(x), rep.int(1L:ncol(x), rep.int(nrow(x), ncol(x)))) } else split(c(x), seq(nrow(x))) if (length(nam <- dimnames(x)[[1 + use.cols]])) names(groups) <- nam invisible(vioplot(groups, ...)) } #' @rdname vioplot #' @export vioplot.list <- function (x, ...){ ind <- sapply(x, is.numeric) if(all(!ind)){ stop(paste("elements are not numeric: ", names(x)[!sapply(x, is.numeric)])) } if(any(!ind)){ warning(paste("some elements are not numeric: ", names(x)[!sapply(x, is.numeric)])) x <- x[sapply(x, is.numeric)] } invisible(vioplot.default(x, ...)) } #' @rdname vioplot #' @export vioplot.data.frame <- vioplot.list #' @rdname vioplot #' @export vioplot.matrix <- vioplot.matrix #' @rdname vioplot #' @export vioplot.formula <- function (formula, data = NULL, ..., subset, na.action = NULL, add = FALSE, ann = !add, horizontal = FALSE, side = "both", xlab = mklab(y_var = horizontal), ylab = mklab(y_var = !horizontal), names=NULL, drop = FALSE, sep = ".", lex.order = FALSE) { if (missing(formula) || (length(formula) != 3L)){ stop("'formula' missing or incorrect") } if(add && side != "both"){ if(!is.null(names)) warning("Warning: names can only be changed on first call of vioplot (when add = FALSE) ") if(!missing(xlab)) warning("Warning: x-axis labels can only be changed on first call of vioplot (when add = FALSE) ") if(!missing(ylab)) warning("Warning: y-axis labels can only be changed on first call of vioplot (when add = FALSE) ") } if (missing(xlab) || missing(ylab)){ mklab <- function(y_var){ if(y_var){ names(mf)[response] } else { paste(names(mf)[-response], collapse = " : ") } } } m <- match.call(expand.dots = FALSE) if (is.matrix(eval(m$data, parent.frame()))) m$data <- as.data.frame(data) m$... <- m$drop <- m$sep <- m$lex.order <- NULL m$xlab <- m$ylab <- m$add <- m$ann <- m$horizontal <- NULL m$names <- m$side <- NULL m$na.action <- na.action m[[1L]] <- quote(stats::model.frame.default) mf <- eval(m, parent.frame()) response <- attr(attr(mf, "terms"), "response") if(add){ xlab <- ylab <- NA } vioplot(split(mf[[response]], mf[-response], drop = drop, sep = sep, lex.order = lex.order), xlab = xlab, ylab = ylab, names = names, add = add, ann = ann, horizontal = horizontal, side = side, ...) } #' @rdname vioplot #' @export vioplot.default <- function (x, ..., data = NULL, range = 1.5, h = NULL, xlim = NULL, ylim = NULL, names = NULL, horizontal = FALSE, col = "grey50", border = par()$fg, lty = 1, lwd = 1, rectCol = par()$fg, lineCol = par()$fg, pchMed = 19, colMed = "white", colMed2 = "grey 75", at, add = FALSE, wex = 1, drawRect = TRUE, areaEqual=FALSE, axes = TRUE, frame.plot = axes, panel.first = NULL, panel.last = NULL, asp = NA, main="", sub="", xlab=NA, ylab=NA, line = NA, outer = FALSE, xlog = NA, ylog=NA, adj=NA, ann = NA, ask=NA, bg=NA, bty=NA, cex=NA, cex.axis=NA, cex.lab=NA, cex.main=NA, cex.names=NULL, cex.sub=NA, cin=NA, col.axis=NA, col.lab=NA, col.main=NA, col.sub=NA, cra=NA, crt=NA, csi=NA,cxy=NA, din=NA, err=NA, family=NA, fg=NA, fig=NA, fin=NA, font=NA, font.axis=NA, font.lab=NA, font.main=NA, font.sub=NA, lab=NA, las=NA, lend=NA, lheight=NA, ljoin=NA, lmitre=NA, mai=NA, mar=NA, mex=NA, mfcol=NA, mfg=NA, mfrow=NA, mgp=NA, mkh=NA, new=NA, oma=NA, omd=NA, omi=NA, page=NA, pch=NA, pin=NA, plt=NA, ps=NA, pty=NA, smo=NA, srt=NA, tck=NA, tcl=NA, usr=NA, xaxp=NA, xaxs=NA, xaxt=NA, xpd=NA, yaxp=NA, yaxs=NA, yaxt=NA, ylbias=NA, log="", logLab=c(1,2,5), na.action = NULL, na.rm = T, side = "both", plotCentre = "point") { #assign graphical parameters if not given for(ii in 1:length(names(par()))){ if(is.na(get(names(par())[ii])[1])) assign(names(par()[ii]), unlist(par()[[ii]])) } if(add && side != "both"){ if(!is.null(names)) warning("Warning: names can only be changed on first call of vioplot (when add = FALSE) ") if(!is.na(xlab)) warning("Warning: x-axis labels can only be changed on first call of vioplot (when add = FALSE) ") if(!is.na(ylab)) warning("vy-axis labels can only be changed on first call of vioplot (when add = FALSE) ") if(!missing(main)) warning("Warning: main title can only be changed on first call of vioplot (when add = FALSE) ") if(!missing(sub)) warning("Warning: subtitle can only be changed on first call of vioplot (when add = FALSE) ") } if(!is.list(x)){ datas <- list(x, ...) } else{ datas <- lapply(x, unlist) if(is.null(names)){ names <- names(datas) } } datas <- lapply(datas, function(x){ if((all(x == na.omit(unique(x))[1] | is.na(x))) & length(x) > 100){ na.omit(unique(x))[1] } else { x } }) if(is.character(log)) if("y" %in% unlist(strsplit(log, ""))) log <- TRUE if(is.na(xlog) | (horizontal == TRUE & (log == FALSE | log == ""))) xlog <- FALSE log <- ifelse(log == TRUE, "y", "") if(log == 'x' | log == 'xy' | xlog == TRUE){ if(horizontal | log == "xy"){ log <- TRUE } else { log <- FALSE ylog <- FALSE } xlog <- FALSE } if(log == TRUE | ylog == TRUE){ ylog <- TRUE log <- "y" } else { log <- "" } if(ylog){ #check data is compatible with log scale if(all(unlist(datas) <= 0)){ ylog <- FALSE warning("log scale cannot be used with non-positive data") } else { #log-scale data datas <- datas #lapply(datas, function(x) log(unlist(x))) } } if(is.null(na.action)) na.action <- na.omit lapply(datas, function(data) data <- data[!sapply(data, is.infinite)]) if(na.rm) datas <- lapply(datas, na.action) n <- length(datas) #if(is.list(datas)) datas <- as.data.frame(datas) if (missing(at)) at <- 1:n upper <- vector(mode = "numeric", length = n) lower <- vector(mode = "numeric", length = n) q1 <- vector(mode = "numeric", length = n) q2 <- vector(mode = "numeric", length = n) q3 <- vector(mode = "numeric", length = n) med <- vector(mode = "numeric", length = n) base <- vector(mode = "list", length = n) height <- vector(mode = "list", length = n) area_check <- vector(mode = "list", length = n) baserange <- c(Inf, -Inf) args <- list(display = "none") radj <- ifelse(side == "right", 0, 1) ladj <- ifelse(side == "left", 0, 1) boxwex <- wex if (!(is.null(h))) args <- c(args, h = h) if(plotCentre == "line") med.dens <- rep(NA, n) if(areaEqual){ for (i in 1:n) { data <- unlist(datas[[i]]) data.min <- min(data, na.rm = na.rm) data.max <- max(data, na.rm = na.rm) q1[i] <- quantile(data, 0.25) q2[i] <- quantile(data, 0.5) q3[i] <- quantile(data, 0.75) med[i] <- median(data) iqd <- q3[i] - q1[i] upper[i] <- min(q3[i] + range * iqd, data.max) lower[i] <- max(q1[i] - range * iqd, data.min) est.xlim <- c(min(lower[i], data.min), max(upper[i], data.max)) smout <- do.call("sm.density", c(list(data, xlim = est.xlim), args)) if(plotCentre == "line"){ med.dat <- do.call("sm.density", c(list(data, xlim=est.xlim, eval.points=med[i], display = "none"))) med.dens[i] <- med.dat$estimate } Avg.pos <- mean(smout$eval.points) xt <- diff(smout$eval.points[smout$eval.points1){ warning("wex may not be a vector if areaEqual is TRUE") print("using first element of wex") wex<-wex[i] } wex <-unlist(area_check)/max(unlist(area_check))*wex } for (i in 1:n) { data <- unlist(datas[[i]]) data.min <- min(data, na.rm = na.rm) data.max <- max(data, na.rm = na.rm) q1[i] <- quantile(data, 0.25) q2[i] <- quantile(data, 0.5) q3[i] <- quantile(data, 0.75) med[i] <- median(data) iqd <- q3[i] - q1[i] upper[i] <- min(q3[i] + range * iqd, data.max) lower[i] <- max(q1[i] - range * iqd, data.min) est.xlim <- c(min(lower[i], data.min), max(upper[i], data.max)) smout <- do.call("sm.density", c(list(data, xlim = est.xlim), args)) hscale <- 0.4/max(smout$estimate) * ifelse(length(wex)>1, wex[i], wex) base[[i]] <- smout$eval.points height[[i]] <- smout$estimate * hscale t <- range(base[[i]]) baserange[1] <- min(baserange[1], t[1]) baserange[2] <- max(baserange[2], t[2]) if(plotCentre == "line"){ med.dat <- do.call("sm.density", c(list(data, xlim=est.xlim, eval.points=med[i], display = "none"))) med.dens[i] <- med.dat$estimate *hscale } } if (!add) { if (is.null(xlim)) { xlim <- if (n == 1){ at + c(-0.5, 0.5) } else { range(at) + min(diff(at))/2 * c(-1, 1) } } else { xlim.default <- if (n == 1){ at + c(-0.5, 0.5) } else { range(at) + min(diff(at))/2 * c(-1, 1) } print(paste0("Using c(", xlim[1],",", xlim[2], ") as input for xlim, note that default values for these dimensions are c(", xlim.default[1],",", xlim.default[2], ")")) } if (is.null(ylim)) { ylim <- baserange } } if (is.null(names)) { label <- 1:n } else { label <- names } boxwidth <- 0.05 * ifelse(length(boxwex)>1, boxwex[i], boxwex) if (!add){ plot.new() if(!horizontal){ plot.window(xlim, ylim, log = log, asp = asp, bty = bty, cex = cex, xaxs = xaxs, yaxs = yaxs, lab = lab, mai = mai, mar = mar, mex = mex, mfcol = mfcol, mfrow = mfrow, mfg = mfg, xlog = xlog, ylog = ylog) } else { plot.window(ylim, xlim, log = ifelse(log == "y", "x", ""), asp = asp, bty = bty, cex = cex, xaxs = xaxs, yaxs = yaxs, lab = lab, mai = mai, mar = mar, mex = mex, mfcol = mfcol, mfrow = mfrow, mfg = mfg, xlog = ylog, ylog = xlog) } } panel.first if (!horizontal) { if (!add) { plot.window(xlim, ylim, log = log, asp = asp, bty = bty, cex = cex, xaxs = xaxs, yaxs = yaxs, lab = lab, mai = mai, mar = mar, mex = mex, mfcol = mfcol, mfrow = mfrow, mfg = mfg, xlog = xlog, ylog = ylog) xaxp <- par()$xaxp yaxp <- par()$yaxp if(yaxt !="n"){ if(ylog){ #log_axis_label <- log_axis_label[log_axis >= exp(par("usr")[3])] #log_axis <- log_axis[log_axis >= exp(par("usr")[3])] #log_axis_label <- log_axis_label[log_axis <= exp(par("usr")[4])] #log_axis <- log_axis[log_axis <= exp(par("usr")[4])] Axis(unlist(datas), side = 2, cex.axis = cex.axis, col.axis = col.axis, font.axis = font.axis, mgp = mgp, tck = tck, tcl = tcl, las = las) # xaxp = xaxp, yaxp = yaxp disabled for log if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, tck = tck, tcl = tcl, las = las) # xaxp = xaxp, yaxp = yaxp disabled for log } } else { Axis(unlist(datas), side = 2, cex.axis = cex.axis, col.axis = col.axis, font.axis = font.axis, mgp = mgp, yaxp = yaxp, tck = tck, tcl = tcl, las = las) if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, xaxp = xaxp, tck = tck, tcl = tcl, las = las) } } } else { if(ylog){ if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, tck = tck, tcl = tcl, las = las) # xaxp = xaxp, yaxp = yaxp disabled for log } } else { if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, xaxp = xaxp, tck = tck, tcl = tcl, las = las) } } } } if (frame.plot) { box(lty = lty, lwd = lwd) } for (i in 1:n) { polygon(c(at[i] - radj*height[[i]], rev(at[i] + ladj*height[[i]])), c(base[[i]], rev(base[[i]])), col = ifelse(length(col)>1,col[1+(i-1)%%length(col)], col), border = ifelse(length(border)>1, border[1+(i-1)%%length(border)], border), lty = lty, lwd = lwd, xpd = xpd, lend = lend, ljoin = ljoin, lmitre = lmitre) if (drawRect) { lines(at[c(i, i)], c(lower[i], upper[i]), lwd = lwd, lty = lty, col = ifelse(length(lineCol)>1, lineCol[1+(i-1)%%length(lineCol)], lineCol), lend = lend, ljoin = ljoin, lmitre = lmitre) rect(at[i] - radj*ifelse(length(boxwidth)>1, boxwidth[i], boxwidth)/2, q1[i], at[i] + ladj*ifelse(length(boxwidth)>1, boxwidth[i], boxwidth)/2, q3[i], col = ifelse(length(rectCol)>1, rectCol[1+(i-1)%%length(rectCol)], rectCol), border = ifelse(length(lineCol)>1, lineCol[1+(i-1)%%length(lineCol)], lineCol), xpd = xpd, lend = lend, ljoin = ljoin, lmitre = lmitre) if(plotCentre == "line"){ lines(x = c(at[i] - radj*med.dens[i], at[i], at[i] + ladj*med.dens[i]), y = rep(med[i],3)) } else { points(at[i], med[i], pch = ifelse(length(pchMed)>1, pchMed[1+(i-1)%%length(pchMed)], pchMed), col = ifelse(length(colMed)>1, colMed[1+(i-1)%%length(colMed)], colMed), bg = ifelse(length(colMed2)>1, colMed2[1+(i-1)%%length(colMed2)], colMed2), cex = cex, lwd = lwd, lty = lty) } } } } else { if(log == "y" || ylog == TRUE){ log <- "x" xlog <- TRUE ylog <- FALSE } if (!add) { plot.window(ylim, xlim, log = log, asp = asp, bty = bty, cex = cex, xaxs = xaxs, yaxs = yaxs, lab = lab, mai = mai, mar = mar, mex = mex, mfcol = mfcol, mfrow = mfrow, mfg = mfg, xlog = xlog, ylog = ylog) xaxp <- par()$xaxp yaxp <- par()$yaxp if(yaxt !="n"){ if(xlog){ #log_axis_label <- log_axis_label[log_axis >= exp(par("usr")[3])] #log_axis <- log_axis[log_axis >= exp(par("usr")[3])] #log_axis_label <- log_axis_label[log_axis <= exp(par("usr")[4])] #log_axis <- log_axis[log_axis <= exp(par("usr")[4])] Axis(unlist(datas), side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, tck = tck, tcl = tcl, las = las) # xaxp = xaxp, yaxp = yaxp disabled for log if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 2, cex.axis = cex.axis, col.axis = col.axis, font.axis = font.axis, mgp = mgp, tck = tck, tcl = tcl, las = las) # xaxp = xaxp, yaxp = yaxp disabled for log } } else { Axis(unlist(datas), side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, xaxp = xaxp, tck = tck, tcl = tcl, las = las) if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 2, cex.axis = cex.axis, col.axis = col.axis, font.axis = font.axis, mgp = mgp, yaxp = yaxp, tck = tck, tcl = tcl, las = las) } } } else { if(ylog){ if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, tck = tck, tcl = tcl, las = las) # xaxp = xaxp, yaxp = yaxp disabled for log } } else { if(is.null(cex.names)) cex.names <- cex.axis if(xaxt !="n"){ Axis(1:length(datas), at = at, labels = label, side = 1, cex.axis = cex.names, col.axis = col.axis, font.axis = font.axis, mgp = mgp, xaxp = xaxp, tck = tck, tcl = tcl, las = las) } } } } if (frame.plot) { box(lty = lty, lwd = lwd) } for (i in 1:n) { polygon(c(base[[i]], rev(base[[i]])), c(at[i] - radj*height[[i]], rev(at[i] + ladj*height[[i]])), col = ifelse(length(col)>1,col[1+(i-1)%%length(col)], col), border = ifelse(length(border)>1, border[1+(i-1)%%length(border)], border), lty = lty, lwd = lwd, xpd = xpd, lend = lend, ljoin = ljoin, lmitre = lmitre) if (drawRect) { lines(c(lower[i], upper[i]), at[c(i, i)], lwd = lwd, lty = lty, col = ifelse(length(lineCol)>1, lineCol[1+(i-1)%%length(lineCol)], lineCol), lend = lend, ljoin = ljoin, lmitre = lmitre) rect(q1[i], at[i] - radj*ifelse(length(boxwidth)>1, boxwidth[i], boxwidth)/2, q3[i], at[i] + ladj*ifelse(length(boxwidth)>1, boxwidth[i], boxwidth)/2, col = ifelse(length(rectCol)>1, rectCol[1+(i-1)%%length(rectCol)], rectCol), border = ifelse(length(lineCol)>1, lineCol[1+(i-1)%%length(lineCol)], lineCol), xpd = xpd, lend = lend, ljoin = ljoin, lmitre = lmitre) if(plotCentre == "line"){ lines(y = c(at[i] - radj*med.dens[i], at[i], at[i] + ladj*med.dens[i]), x = rep(med[i],3)) } else { points(med[i], at[i], pch = ifelse(length(pchMed)>1, pchMed[1+(i-1)%%length(pchMed)], pchMed), col = ifelse(length(colMed)>1, colMed[1+(i-1)%%length(colMed)], colMed), , bg = ifelse(length(colMed2)>1, colMed2[1+(i-1)%%length(colMed2)], colMed2), cex = cex, lwd = lwd, lty = lty) } } } } panel.last if (ann) { title(main = main, sub = sub, xlab = xlab, ylab = ylab, line = line, outer = outer, xpd = xpd, cex.main = cex.main, col.main = col.main, font.main = font.main) } invisible(list(upper = upper, lower = lower, median = med, q1 = q1, q3 = q3)) } vioplot/NEWS.md0000644000176200001440000000530014344404314013043 0ustar liggesusers# vioplot 0.4.0 (2022) New feature - adds feature for histograms in dedicated function discussed on GitHub issue #15 and PR #18 Documentation - adds vignette for histograms #18 - adds examples to overlay information with base R graphics discussed in issues #16 and #17 on GitHub - updates documentation for h parameter #14 Bug fixes - allow supression of y-axes with `yaxt = 'n'` without disabing x-axes (should be independent parameter). Resolves unexpected behaviour reported on GitHub issue #16. - allow NA values when plotting repeated values resolves bug in #13 commit bd68db3c10ee5b8a550568f449fecd1d47a62197 # vioplot 0.3.7 (2021) Updates maintainer contact details. # vioplot 0.3.6 (2021) Bug fixes. - allow plotting repeated non-unique values over threshold number with checks #13 # vioplot 0.3.5 (2020) Bug fixes. - allow reuse of vector inputs - correct graphical paramters: xaxt, xlim - correct log scales (xlog) for horizontal violins - document axes labels for split violins # vioplot 0.3.4 (2019) Bug fixes. - avoids altering base plotting parameters `par()` - resolves issues calling log inputs without an explicit `log` parameter as text # vioplot 0.3.3 (2019) Minor release with improvements to passing parameters. - improved passing of base R plotting parameters - resolves issues with variable names and factor levels in formula inputs # vioplot 0.3.2 (2019) Minor release with improvements to passing parameters. - improved handling for formula input: levels for names and variable names for axes labels - improved passing of graphical parameters to title, and axis - axes for log-scale are automatically generated and horizontal plots are supported Examples for formula input added for convenience (this method is recommended). # vioplot 0.3.1 (2019) Minor release with continuous integration testing, improved vignettes, and License. Compatible with GitHub and CRAN Release. # vioplot 0.3.0 (2018) ## Major changes - formula inputs vioplot is now compatible with all inputs of boxplot or beanplot, including formula inputs (implemented as S3 methods). - plot customisation Various features of violins can be tweaked with plotting parameters, such as colours and shapes of aspects of the violin. These can be applied to all violins with a single (scalar) input or applied separately to each violin with multiple (vector) inputs - defaults This version is fully compatible with inputs to vioplot 0.2. The only difference in behaviour changing the default colour from a glaring magenta to a monochrome grey (more appropriate in a wider range of professional settings). Code written for previous versions should run without breaking or changes in behaviour apart from the default colour. vioplot/MD50000644000176200001440000001020714344672512012266 0ustar liggesusers2f4f8973c10ac6b494a0f04113938a13 *DESCRIPTION b0353042d8325f070abcc2c7300342b8 *LICENSE 3febfe3c4618957d27ec883001777603 *NAMESPACE bc1fbf866364bbb8e6ed0d5996156f5b *NEWS.md c1a78031547a9b3f748fd8995bf6b762 *R/histoplot.R 4317ea1652ef5015e50d6a4188884073 *R/vioplot.R d114d815b71036acfbe54e486da76cd3 *R/vioplot.stats.R 32e8e571a4ea65a362a70429b6dc1b7e *build/vignette.rds 2f850208c6b5df3b3a2a9aa6ae395aaa *inst/CITATION b5788ef1ec4d2e485a8cd39c46eeb8af *inst/COPYRIGHT e8f1f1e5ad97c461f3ac58adbd2328cf *inst/doc/histogram_customisation.R 1a101eeb0b0f1c7ee0cf494728c6dc64 *inst/doc/histogram_customisation.Rmd 573359a585853d00d7a75359765d8adc *inst/doc/histogram_customisation.html fcad88faeeead9487dcc02010bcaee03 *inst/doc/histogram_formulae.R 5555b73f7e396cad76ebd530304364b4 *inst/doc/histogram_formulae.Rmd b7cfcd8e96747325bc47fdba50e5ed10 *inst/doc/histogram_formulae.html 8fee761d21ba301877858ab91297533a *inst/doc/overlaying_annotations.R aac4ede878f5c2dcdf433bf7a72724b0 *inst/doc/overlaying_annotations.Rmd bf8c528c59ad9ad9db16a64db276f471 *inst/doc/overlaying_annotations.html b3acfff1e90c94b7f89b139ad723cce9 *inst/doc/violin_area.R 278eab2d23325e33e86e7e76dc0c1e90 *inst/doc/violin_area.Rmd d2164c5cb8f0271559cfe7413ff20a81 *inst/doc/violin_area.html a6c01ad2c1eeb46ccdb9e99a4c7cc52a *inst/doc/violin_customisation.R 61bbd69de37fa4eadb80ecb65e92ebb1 *inst/doc/violin_customisation.Rmd 076fd884252bf93ab06afadaee18ee81 *inst/doc/violin_customisation.html 9b6cefdf921ecd5baf9afbdf43756684 *inst/doc/violin_formulae.R d3ee832b7905a0b7346ad1041e85dfdb *inst/doc/violin_formulae.Rmd e00a6002dae8fde409549751e3066e27 *inst/doc/violin_formulae.html 1d8684154f5ae2e814d7bd991fc17e24 *inst/doc/violin_split.R 199645abc86916d44342b548285d067a *inst/doc/violin_split.Rmd e65ba943a858b28155c5359b1c45e4ac *inst/doc/violin_split.html 9f7b64627cf6e2d012c12c8dc7609b30 *inst/doc/violin_ylog.R a41c73ae1df957ee84c4ef8b91e2b36a *inst/doc/violin_ylog.Rmd b47b243772e5add78f030525a9154508 *inst/doc/violin_ylog.html 511be591b4a50c3c3720b75cb3f2cf65 *man/histoplot.Rd 7cf9a11c319c7c5250eb717164f98de3 *man/violin.stats.Rd 876486012d78c56b21b7e9fd7781d42a *man/vioplot.Rd faccc5d00425006d9beb9babf4752e44 *tests/testthat.R e752743526b314a26947d7f21035ac49 *tests/testthat/test_histoplot_customisation.R 743b98de551a2094d1a29fe16b38d941 *tests/testthat/test_histoplot_formula.R b3e870b5983833f905cb9b0a73520364 *tests/testthat/test_violin_area.R 4464bb0ca1cbedcd2d05d706f65768a5 *tests/testthat/test_violin_classes.R 737539ba9e124f0e0afe268f2981fe89 *tests/testthat/test_violin_customisation.R 32cbdc763f82e46543e055986e6ef0ae *tests/testthat/test_violin_formula.R 85993975a25f4baab807622ba6846ab0 *tests/testthat/test_violin_median.R 9078f2684fba82c384ca17dfa8d38d14 *tests/testthat/test_violin_na_handle.R 70f97529f03d7ffc9e9394752077783e *tests/testthat/test_violin_names.R a38e20901a1dba8f1721f19a4f7e7f9f *tests/testthat/test_violin_side.R f6c59f7fedeed3f7a4df9f3ab8f41eac *tests/testthat/test_violin_unequal_groups.R 5960eb19992dcae212abf234f23db83a *tests/testthat/test_ylog.R f534a569ab6e49235d90a12ff588157e *vignettes/backup/violin_area.Rmd ed82e28a7e639a71348d27caf368d27b *vignettes/backup/violin_area.html 214df39d790c0b9b7fe85e658b261a34 *vignettes/backup/violin_customisation.Rmd f7440770801b4ebb6ac7626e818acc12 *vignettes/backup/violin_customisation.html c4232529d703937acda2edbca88b77ea *vignettes/backup/violin_formulae.html 13f450a14224e623ca331c75bd1edabc *vignettes/backup/violin_split.Rmd 41127ffeefdeb4fadc1c0d9029d14052 *vignettes/backup/violin_split.html 8944fe4605b90795412173ef430e957a *vignettes/backup/violin_ylog.Rmd ad4c189d29867f0a5c820e708f2e0e90 *vignettes/backup/violin_ylog.html 1a101eeb0b0f1c7ee0cf494728c6dc64 *vignettes/histogram_customisation.Rmd 5555b73f7e396cad76ebd530304364b4 *vignettes/histogram_formulae.Rmd aac4ede878f5c2dcdf433bf7a72724b0 *vignettes/overlaying_annotations.Rmd 278eab2d23325e33e86e7e76dc0c1e90 *vignettes/violin_area.Rmd 61bbd69de37fa4eadb80ecb65e92ebb1 *vignettes/violin_customisation.Rmd d3ee832b7905a0b7346ad1041e85dfdb *vignettes/violin_formulae.Rmd 199645abc86916d44342b548285d067a *vignettes/violin_split.Rmd a41c73ae1df957ee84c4ef8b91e2b36a *vignettes/violin_ylog.Rmd vioplot/inst/0000755000176200001440000000000014344507257012736 5ustar liggesusersvioplot/inst/doc/0000755000176200001440000000000014344507257013503 5ustar liggesusersvioplot/inst/doc/histogram_formulae.html0000644000176200001440000103713014344507252020260 0ustar liggesusers Customising Histogram Plots with Formula Input

Customising Histogram Plots with Formula Input

Tom Kelly

2022-12-09

Since boxplots have become the de facto standard for plotting the distribution of data most users are familiar with these and the formula input for dataframes. However this input is not available in the standard histoplot package. Thus it has been restored here for enhanced backwards compatibility with boxplot.

As shown below for the iris dataset, histogram plots show distribution information taking formula input that boxplot implements but histoplot is unable to. This demonstrates the customisation demonstrated in the main histoplot vignette using histoplot syntax with the formula method commonly used for boxplot, t.test, and lm.

library("vioplot")
data(iris)
boxplot(Sepal.Length~Species, data = iris)

Whereas performing the same function does not work with vioplot (0.2).

devtools::install_version("vioplot", version = "0.2")
library("vioplot")
vioplot(Sepal.Length~Species, data = iris)
Error in min(data) : invalid 'type' (language) of argument

Plot Defaults

vioplot(Sepal.Length~Species, data = iris)

Another concern we see here is that the vioplot defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here:

vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length")

Plot colours: Histogram Fill

Plot colours can be further customised as with the original vioplot package using the col argument:

histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue")

Vectorisation

However the vioplot (0.2) function is unable to colour each histogram separately, thus this is enabled with a vectorised col in histoplot (0.4):

histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"))
legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5)

Plot colours: Violin Lines and Boxplot

Colours can also be customised for the histogram fill and border separately using the col and border arguments:

histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue")

Similarly, the arguments lineCol and rectCol specify the colours of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour.

histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", rectCol="palevioletred", lineCol="violetred")

The same applies to the colour of the median point with colMed:

histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", colMed="violet")

### Combined customisation

These can be customised colours can be combined:

histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet")

Vectorisation

These colour and shape settings can also be customised separately for each histogram:

histoplot(Sepal.Length~Species, data = iris, main="Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19))

Split Bihistogram Plots

We set up the data with two categories (Sepal Width) as follows:

data(iris)
summary(iris$Sepal.Width)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.800   3.000   3.057   3.300   4.400
table(iris$Sepal.Width > mean(iris$Sepal.Width))
## 
## FALSE  TRUE 
##    83    67
iris_large <- iris[iris$Sepal.Width > mean(iris$Sepal.Width), ]
iris_small <- iris[iris$Sepal.Width <= mean(iris$Sepal.Width), ]

A direct comparision of 2 datasets can be made with the side argument and add = TRUE on the second plot:

histoplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right")
histoplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T)
title(xlab = "Species", ylab = "Sepal Length")
legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width")

vioplot/inst/doc/histogram_formulae.Rmd0000755000176200001440000001201114344404314020022 0ustar liggesusers--- title: "Customising Histogram Plots with Formula Input" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{histoplot: Customising Histogram Plots with Formula Input} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- Since boxplots have become the _de facto_ standard for plotting the distribution of data most users are familiar with these and the formula input for dataframes. However this input is not available in the standard `histoplot` package. Thus it has been restored here for enhanced backwards compatibility with `boxplot`. As shown below for the `iris` dataset, histogram plots show distribution information taking formula input that `boxplot` implements but `histoplot` is unable to. This demonstrates the customisation demonstrated in [the main histoplot vignette using histoplot syntax](histogram_customisation.html) with the formula method commonly used for `boxplot`, `t.test`, and `lm`. ```{r} library("vioplot") ``` ```{r, message=FALSE, eval=FALSE} data(iris) boxplot(Sepal.Length~Species, data = iris) ``` ```{r, message=FALSE, echo=FALSE} data(iris) boxplot(Sepal.Length~Species, data = iris, main = "Sepal Length") ``` Whereas performing the same function does not work with `vioplot` (0.2). ```{r, message=FALSE, eval=FALSE} devtools::install_version("vioplot", version = "0.2") library("vioplot") vioplot(Sepal.Length~Species, data = iris) ``` ``` Error in min(data) : invalid 'type' (language) of argument ``` ## Plot Defaults ```{r, message=FALSE, eval=FALSE} vioplot(Sepal.Length~Species, data = iris) ``` ```{r, message=FALSE, echo=FALSE} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="magenta") ``` Another concern we see here is that the `vioplot` defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length") ``` ## Plot colours: Histogram Fill Plot colours can be further customised as with the original vioplot package using the `col` argument: ```{r} histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue") ``` ### Vectorisation However the `vioplot` (0.2) function is unable to colour each histogram separately, thus this is enabled with a vectorised `col` in `histoplot` (0.4): ```{r} histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ``` ## Plot colours: Violin Lines and Boxplot Colours can also be customised for the histogram fill and border separately using the `col` and `border` arguments: ```{r} histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue") ``` Similarly, the arguments `lineCol` and `rectCol` specify the colours of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour. ```{r} histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") ``` The same applies to the colour of the median point with `colMed`: ```{r} histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", colMed="violet") ``` ### Combined customisation These can be customised colours can be combined: ```{r} histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") ``` ### Vectorisation These colour and shape settings can also be customised separately for each histogram: ```{r} histoplot(Sepal.Length~Species, data = iris, main="Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) ``` ## Split Bihistogram Plots We set up the data with two categories (Sepal Width) as follows: ```{r, message=FALSE} data(iris) summary(iris$Sepal.Width) table(iris$Sepal.Width > mean(iris$Sepal.Width)) iris_large <- iris[iris$Sepal.Width > mean(iris$Sepal.Width), ] iris_small <- iris[iris$Sepal.Width <= mean(iris$Sepal.Width), ] ``` A direct comparision of 2 datasets can be made with the `side` argument and `add = TRUE` on the second plot: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} histoplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right") histoplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T) title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` vioplot/inst/doc/violin_area.R0000644000176200001440000001243214344507253016114 0ustar liggesusers## ----------------------------------------------------------------------------- library("vioplot") ## ---- message=FALSE----------------------------------------------------------- data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) ## ---- echo=FALSE, message=FALSE----------------------------------------------- par(mar=rep(1,4)) ## ----------------------------------------------------------------------------- par(mfrow=c(3, 1)) par(mar=rep(2, 4)) plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length: setosa", col="green") plot(density(iris$Sepal.Length[iris$Species=="versicolor"]), main="Sepal Length: versicolor", col="blue") plot(density(iris$Sepal.Length[iris$Species=="virginica"]), main="Sepal Length: virginica", col="palevioletred4") par(mfrow=c(1, 1)) ## ---- echo=FALSE, message=FALSE----------------------------------------------- par(mar=c(5, 4, 4, 2) + 0.1) ## ---- echo=FALSE, message=FALSE----------------------------------------------- par(mar=rep(2,4)) ## ----------------------------------------------------------------------------- par(mfrow=c(3, 1)) par(mar=rep(2, 4)) xaxis <- c(3, 9) yaxis <- c(0, 1.25) plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length: setosa", col="green", xlim=xaxis, ylim=yaxis) plot(density(iris$Sepal.Length[iris$Species=="versicolor"]), main="Sepal Length: versicolor", col="blue", xlim=xaxis, ylim=yaxis) plot(density(iris$Sepal.Length[iris$Species=="virginica"]), main="Sepal Length: virginica", col="palevioletred4", xlim=xaxis, ylim=yaxis) par(mfrow=c(1, 1)) ## ---- echo=FALSE, message=FALSE----------------------------------------------- par(mar=c(5, 4, 4, 2) + 0.1) ## ----------------------------------------------------------------------------- par(mfrow=c(1, 1)) xaxis <- c(3, 9) yaxis <- c(0, 1.25) plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length", col="green", xlim=xaxis, ylim=yaxis) lines(density(iris$Sepal.Length[iris$Species=="versicolor"]), col="blue") lines(density(iris$Sepal.Length[iris$Species=="virginica"]), col="palevioletred4") legend("topright", fill=c("green", "blue", "palevioletred4"), legend=levels(iris$Species), cex=0.5) ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", areaEqual = T) ## ---- echo=FALSE, message=FALSE----------------------------------------------- par(mar=rep(2, 4)) ## ----------------------------------------------------------------------------- par(mfrow=c(2,1)) par(mar=rep(2, 4)) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Width)", areaEqual = F) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T) par(mfrow=c(1,1)) ## ---- echo=FALSE, message=FALSE----------------------------------------------- par(mar=c(5, 4, 4, 2) + 0.1) ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), rectCol=c("green", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), border=c("darkolivegreen4", "royalblue4", "violetred4")) ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), rectCol=c("green", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), border=c("darkolivegreen4", "royalblue4", "violetred4"), wex=1.25) ## ----------------------------------------------------------------------------- vioplot(rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), rlnorm(200, 0, 0.5), rnbinom(200, 10, 0.9), rlogis(20, 0, 0.5), areaEqual = F, main="Equal Width", xlab="distribution", ylab="data value", names=c("normal", "poisson", "binomial", "log-normal", "neg-binomial", "logistic")) vioplot(rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), rlnorm(200, 0, 0.5), rnbinom(200, 10, 0.9), rlogis(20, 0, 0.5), areaEqual = T, main="Equal Area", xlab="distribution", ylab="data value", names=c("normal", "poisson", "binomial", "log-normal", "neg-binomial", "logistic")) vioplot/inst/doc/overlaying_annotations.R0000644000176200001440000000346014344507252020420 0ustar liggesusers## ----------------------------------------------------------------------------- # generate dummy data a <- rnorm(25, 3, 0.5) b <- rnorm(25, 2, 1.0) c <- rnorm(25, 2.75, 0.25) d <- rnorm(25, 3.15, 0.375) e <- rnorm(25, 1, 0.25) datamat <- cbind(a, b, c, d, e) dim(datamat) ## ----------------------------------------------------------------------------- library("vioplot") ## ----------------------------------------------------------------------------- vioplot(datamat, ylim = c(0, 5)) # compute medians data.med <- apply(datamat, 2, median) data.med #overlay medians lines(data.med, lty = 2, lwd = 1.5) points(data.med, pch = 19, col = "red", cex = 2.25) ## ----------------------------------------------------------------------------- outcome <- c(rnorm(25, 3, 1), rnorm(25, 2, 0.5)) intervention <- c(rep("treatment", 25), rep("control", 25)) table(intervention) names(table(intervention)) unique(sort(intervention)) intervention <- as.factor(intervention) levels(intervention) d <- data.frame(outcome, intervention) vioplot(outcome ~ intervention, data = d, xaxt = 'n', yaxt = 'n', main = "", xlab = "", ylab = "") axis(side = 1, at = 1:length(levels(intervention)), labels = levels(intervention)) mtext("custom x labels for intervention", side = 1) mtext("custom y labels for outcome", side = 2) title(main = "example with custom title", sub = "subtitles are supported") ## ----------------------------------------------------------------------------- histoplot(outcome ~ intervention, data = d, xaxt = 'n', yaxt = 'n', main = "", xlab = "", ylab = "") axis(side = 1, at = 1:length(levels(intervention)), labels = levels(intervention)) mtext("custom x labels for intervention", side = 1) mtext("custom y labels for outcome", side = 2) title(main = "example with custom title", sub = "subtitles are supported") vioplot/inst/doc/overlaying_annotations.Rmd0000755000176200001440000000550314344404314020737 0ustar liggesusers--- title: "Overlaying base R graphics" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Overlaying base R graphics} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ## Introduction: Integration with base R graphics Here we demonstrate how to combine violin plots with other base R graphics. In principle any base R graphics can be overlayed on top of a violin plot for annotation. Many problems can be resolved by overlaying base R graphics and integrating vioplot with other plotting functions. Any additional elements can be overlayed by running commands after generating the plot. The x-axes are integer values [1,2,3,…] for each violin. The y-axes are continuous values as displayed. The following plotting elements are supported for example: points, lines, polygon It is also possible to modify plotting parameters with: title, axis, legend "vioplot()" functions similar to "plot()" and passes input arguments from "par()". ### Plotting violins with highlighted medians For example it is possible to add additional annotations. ```{r} # generate dummy data a <- rnorm(25, 3, 0.5) b <- rnorm(25, 2, 1.0) c <- rnorm(25, 2.75, 0.25) d <- rnorm(25, 3.15, 0.375) e <- rnorm(25, 1, 0.25) datamat <- cbind(a, b, c, d, e) dim(datamat) ``` ```{r} library("vioplot") ``` ```{r} vioplot(datamat, ylim = c(0, 5)) # compute medians data.med <- apply(datamat, 2, median) data.med #overlay medians lines(data.med, lty = 2, lwd = 1.5) points(data.med, pch = 19, col = "red", cex = 2.25) ``` ### Custom axes and titles It is also possible to modify the axes labels and titles as shown in this example. Here default axes are suppressed and replaced with custom parameters. ```{r} outcome <- c(rnorm(25, 3, 1), rnorm(25, 2, 0.5)) intervention <- c(rep("treatment", 25), rep("control", 25)) table(intervention) names(table(intervention)) unique(sort(intervention)) intervention <- as.factor(intervention) levels(intervention) d <- data.frame(outcome, intervention) vioplot(outcome ~ intervention, data = d, xaxt = 'n', yaxt = 'n', main = "", xlab = "", ylab = "") axis(side = 1, at = 1:length(levels(intervention)), labels = levels(intervention)) mtext("custom x labels for intervention", side = 1) mtext("custom y labels for outcome", side = 2) title(main = "example with custom title", sub = "subtitles are supported") ``` #### Annotated histograms This is also supported by the histogram plot. ```{r} histoplot(outcome ~ intervention, data = d, xaxt = 'n', yaxt = 'n', main = "", xlab = "", ylab = "") axis(side = 1, at = 1:length(levels(intervention)), labels = levels(intervention)) mtext("custom x labels for intervention", side = 1) mtext("custom y labels for outcome", side = 2) title(main = "example with custom title", sub = "subtitles are supported") ``` vioplot/inst/doc/violin_formulae.R0000644000176200001440000000530214344507255017016 0ustar liggesusers## ----------------------------------------------------------------------------- library("vioplot") ## ---- message=FALSE, eval=FALSE----------------------------------------------- # data(iris) # boxplot(Sepal.Length~Species, data = iris) ## ---- message=FALSE, echo=FALSE----------------------------------------------- data(iris) boxplot(Sepal.Length~Species, data = iris, main = "Sepal Length") ## ---- message=FALSE, eval=FALSE----------------------------------------------- # devtools::install_version("vioplot", version = "0.2") # library("vioplot") # vioplot(Sepal.Length~Species, data = iris) ## ---- message=FALSE, eval=FALSE----------------------------------------------- # vioplot(Sepal.Length~Species, data = iris) ## ---- message=FALSE, echo=FALSE----------------------------------------------- vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="magenta") ## ----------------------------------------------------------------------------- vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length") ## ----------------------------------------------------------------------------- vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue") ## ----------------------------------------------------------------------------- vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ## ----------------------------------------------------------------------------- vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue") ## ----------------------------------------------------------------------------- vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") ## ----------------------------------------------------------------------------- vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", colMed="violet") ## ----------------------------------------------------------------------------- vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") ## ----------------------------------------------------------------------------- vioplot(Sepal.Length~Species, data = iris, main="Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) vioplot/inst/doc/histogram_customisation.Rmd0000755000176200001440000001731014344475146021133 0ustar liggesusers--- title: "Customising Violin Plots with Histograms" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Customising Violin Plots with Histograms} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- While boxplots have become the _de facto_ standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Gaussian "Normal" distribution that most researchers have become accustomed to. While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience. Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits: - Greater flexibility for plotting variation than boxplots - More familiarity to boxplot users than density plots - Easier to directly compare data types than existing plots As shown below for the `iris` dataset, histogram plots show distribution information that the boxplot is unable to. ```{r} library("vioplot") ``` ```{r, message=FALSE, eval=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) library("vioplot") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) ``` ```{r, message=FALSE, echo=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta") ``` ## Plot Defaults However as we can see here the plot defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") ``` # Histogram plot Here we introduce a variant of the violin plot, using a mirrored bihistogram to show the distribution: ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") ``` ## Plot colours: Histogram Fill Plot colours can be further customised as with the original viooplot package using the `col` argument: ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue") ``` ### Vectorisation The `vioplot` (0.2) function is unable to colour each histogram separately, thus this is enabled with a vectorised `col` in `viooplot` (0.3) and `histoplot` (0.4): ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ``` ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ``` ## Plot colours: Violin Lines and Boxplot Colours can also be customised for the histogram fill and border separately using the `col` and `border` arguments: ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue") ``` Similarly, the arguments `lineCol` and `rectCol` specify the colors of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour. ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") ``` The same applies to the colour of the median point with `colMed`: ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", colMed="violet") ``` ### Combined customisation These can be customised colours can be combined: ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") ``` ### Vectorisation These color and shape settings can also be customised separately for each histogram: ```{r} histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) ``` ## Split Bihistogram Plots We set up the data with two categories (Sepal Width) as follows: ```{r, message=FALSE} data(iris) summary(iris$Sepal.Width) table(iris$Sepal.Width > mean(iris$Sepal.Width)) iris_large <- iris[iris$Sepal.Width > mean(iris$Sepal.Width), ] iris_small <- iris[iris$Sepal.Width <= mean(iris$Sepal.Width), ] ``` A direct comparision of 2 datasets can be made with the `side` argument and `add = TRUE` on the second plot: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} histoplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right") histoplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T) title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` vioplot/inst/doc/violin_split.R0000644000176200001440000001202314344507256016336 0ustar liggesusers## ---- fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'---- library("vioplot") ## ---- message=FALSE----------------------------------------------------------- data(iris) summary(iris$Sepal.Width) table(iris$Sepal.Width > mean(iris$Sepal.Width)) iris_large <- iris[iris$Sepal.Width > mean(iris$Sepal.Width), ] iris_small <- iris[iris$Sepal.Width <= mean(iris$Sepal.Width), ] ## ---- fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'---- boxplot(Sepal.Length~Species, data=iris, col="grey") ## ---- fig.align = 'center', fig.height = 6, fig.width = 6, fig.keep = 'last'---- { par(mfrow=c(2,1)) boxplot(Sepal.Length~Species, data=iris_small, col = "lightblue") boxplot(Sepal.Length~Species, data=iris_large, col = "palevioletred") par(mfrow=c(1,1)) } ## ---- fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'---- vioplot(Sepal.Length~Species, data=iris) ## ---- fig.align = 'center', fig.height = 6, fig.width = 6, fig.keep = 'last'---- { par(mfrow=c(2,1)) vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line") vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line") par(mfrow=c(1,1)) } ## ---- fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'---- vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T) title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ## ---- fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'---- vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right", xlab = "Iris species", ylab = "Length", main = "Sepals", names=paste("Iris", levels(iris$Species))) vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T) legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Width") ## ---- fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'---- vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T, xlab = "Iris species", ylab = "Length", main = "Sepals", names=paste("Iris", levels(iris$Species))) legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Width") ## ---- fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'---- vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "point", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "point", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T) title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ## ---- fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'---- vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "point", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "point", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T) points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_large[grep(species, iris_large$Species),]$Sepal.Length))), pch = 21, col = "palevioletred4", bg = "palevioletred2") title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ## ---- fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'---- vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T) points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_large[grep(species, iris_large$Species),]$Sepal.Length))), pch = 21, col = "palevioletred4", bg = "palevioletred2") points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_small[grep(species, iris_small$Species),]$Sepal.Length))), pch = 21, col = "lightblue4", bg = "lightblue2") title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") vioplot/inst/doc/violin_split.Rmd0000755000176200001440000001641114027763472016671 0ustar liggesusers--- title: "Split Violin Plots" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette fig_width: 6 fig_height: 3 fig_align: 'center' fig_keep: 'last' vignette: > %\VignetteIndexEntry{vioplot: Split Violin Plots} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ##Violin Plots Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits: - Greater flexibility for plotting variation than boxplots - More familiarity to boxplot users than density plots - Easier to directly compare data types than existing plots As shown below for the `iris` dataset, violin plots show distribution information that the boxplot is unable to. ###General Set up ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} library("vioplot") ``` We set up the data with two categories (Sepal Width) as follows: ```{r, message=FALSE} data(iris) summary(iris$Sepal.Width) table(iris$Sepal.Width > mean(iris$Sepal.Width)) iris_large <- iris[iris$Sepal.Width > mean(iris$Sepal.Width), ] iris_small <- iris[iris$Sepal.Width <= mean(iris$Sepal.Width), ] ``` ###Boxplots First we plot Sepal Length on its own: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} boxplot(Sepal.Length~Species, data=iris, col="grey") ``` An indirect comparison can be achieved with par: ```{r, fig.align = 'center', fig.height = 6, fig.width = 6, fig.keep = 'last'} { par(mfrow=c(2,1)) boxplot(Sepal.Length~Species, data=iris_small, col = "lightblue") boxplot(Sepal.Length~Species, data=iris_large, col = "palevioletred") par(mfrow=c(1,1)) } ``` ### Violin Plots First we plot Sepal Length on its own: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris) ``` An indirect comparison can be achieved with par: ```{r, fig.align = 'center', fig.height = 6, fig.width = 6, fig.keep = 'last'} { par(mfrow=c(2,1)) vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line") vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line") par(mfrow=c(1,1)) } ``` ### Split Violin Plots A more direct comparision can be made with the `side` argument and `add = TRUE` on the second plot: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T) title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` #### Custom axes labels Custom axes labels are supported for split violin plots. However, you must use these arguments on the *first* call of `vioplot`. ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right", xlab = "Iris species", ylab = "Length", main = "Sepals", names=paste("Iris", levels(iris$Species))) vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T) legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Width") ``` Note that this is disabled for the second `vioplot` call to avoid overlaying labels. ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T, xlab = "Iris species", ylab = "Length", main = "Sepals", names=paste("Iris", levels(iris$Species))) legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Width") ``` #### Median The line median option is more suitable for side by side comparisions but the point option is still available also: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "point", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "point", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T) title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` It may be necessary to include a `points` command to fix the median being overwritten by the following plots: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "point", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "point", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T) points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_large[grep(species, iris_large$Species),]$Sepal.Length))), pch = 21, col = "palevioletred4", bg = "palevioletred2") title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` Similarly points could be added where a line has been used previously: ```{r, fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'} vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2") vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T) points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_large[grep(species, iris_large$Species),]$Sepal.Length))), pch = 21, col = "palevioletred4", bg = "palevioletred2") points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_small[grep(species, iris_small$Species),]$Sepal.Length))), pch = 21, col = "lightblue4", bg = "lightblue2") title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") ``` Here it is aesthetically pleasing and intuitive to interpret categorical differences in mean and variation in a continuous variable. #### Sources These extensions to `vioplot` here are based on those provided here: * https://gist.github.com/mbjoseph/5852613 These have previously been discussed on the following sites: * https://mbjoseph.github.io/posts/2018-12-23-split-violin-plots/ * http://tagteam.harvard.edu/hub_feeds/1981/feed_items/209875 * [https://www.r-bloggers.com/split-violin-plots/](https://www.r-bloggers.com/2013/06/split-violin-plots/) vioplot/inst/doc/violin_ylog.Rmd0000755000176200001440000001031614027267745016510 0ustar liggesusers--- title: "Controlling y-axis Plotting" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Controlling y-axis Plotting} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- While boxplots have become the _de facto_ standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Gaussian "Normal" distribution that most researchers have become accustomed to. While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience. ##Violin Plots Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits: - Greater flexibility for plotting variation than boxplots - More familiarity to boxplot users than density plots - Easier to directly compare data types than existing plots As shown below for the `iris` dataset, violin plots show distribution information that the boxplot is unable to. ```{r} library("vioplot") ``` ```{r, message=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) ``` ##Violin y-axis ###Logarithmic scale However the existing violin plot packages (such as \code{\link[vioplot]{vioplot}}) do not support log-scale of the y-axis. This has been amended with the `ylog` argument. ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = T, ylim=c(log(1), log(10))) ``` This can also be invoked with the `log="y"` argument compatible with `boxplot`: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = T, ylim=c(log(1), log(10))) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = "y", ylim=c(log(1), log(10))) ``` ###custom y-axes The y-axes can also be removed with `yaxt="n"` to enable customised y-axes: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = T, yaxt="n", ylim=c(log(1), log(10))) ``` Thus custom axes can be added to violin plots. As shown on a linear scale: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n") axis(2, at=1:10, labels=1:10) ``` As well as for on a log scale: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n", log="y", ylim=c(log(4), log(9))) axis(2, at=log(1:10), labels=1:10) ``` vioplot/inst/doc/violin_split.html0000644000176200001440000137326714344507256017126 0ustar liggesusers Split Violin Plots

Split Violin Plots

Tom Kelly

2022-12-09

##Violin Plots

Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits:

  • Greater flexibility for plotting variation than boxplots
  • More familiarity to boxplot users than density plots
  • Easier to directly compare data types than existing plots

As shown below for the iris dataset, violin plots show distribution information that the boxplot is unable to.

###General Set up

library("vioplot")

We set up the data with two categories (Sepal Width) as follows:

data(iris)
summary(iris$Sepal.Width)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.800   3.000   3.057   3.300   4.400
table(iris$Sepal.Width > mean(iris$Sepal.Width))
## 
## FALSE  TRUE 
##    83    67
iris_large <- iris[iris$Sepal.Width > mean(iris$Sepal.Width), ]
iris_small <- iris[iris$Sepal.Width <= mean(iris$Sepal.Width), ]

###Boxplots

First we plot Sepal Length on its own:

boxplot(Sepal.Length~Species, data=iris, col="grey")

An indirect comparison can be achieved with par:

{
  par(mfrow=c(2,1))
boxplot(Sepal.Length~Species, data=iris_small, col = "lightblue")
boxplot(Sepal.Length~Species, data=iris_large, col = "palevioletred")
par(mfrow=c(1,1))
}

Violin Plots

First we plot Sepal Length on its own:

vioplot(Sepal.Length~Species, data=iris)

An indirect comparison can be achieved with par:

{
  par(mfrow=c(2,1))
vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line")
vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line")
par(mfrow=c(1,1))
}

Split Violin Plots

A more direct comparision can be made with the side argument and add = TRUE on the second plot:

vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right")
vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T)
title(xlab = "Species", ylab = "Sepal Length")
legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width")

Custom axes labels

Custom axes labels are supported for split violin plots. However, you must use these arguments on the first call of vioplot.

vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right", xlab = "Iris species", ylab = "Length", main = "Sepals", names=paste("Iris", levels(iris$Species)))
vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T)
legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Width")

Note that this is disabled for the second vioplot call to avoid overlaying labels.

vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right")
vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T, xlab = "Iris species", ylab = "Length", main = "Sepals", names=paste("Iris", levels(iris$Species)))
## Warning in vioplot.formula(Sepal.Length ~ Species, data = iris_small, col = "lightblue", : Warning: names can only be changed on first call of vioplot (when add = FALSE)
## Warning in vioplot.formula(Sepal.Length ~ Species, data = iris_small, col = "lightblue", : Warning: x-axis labels can only be changed on first call of vioplot (when add = FALSE)
## Warning in vioplot.formula(Sepal.Length ~ Species, data = iris_small, col = "lightblue", : Warning: y-axis labels can only be changed on first call of vioplot (when add = FALSE)
## Warning in vioplot.default(x, ...): Warning: names can only be changed on first call of vioplot (when add = FALSE)
## Warning in vioplot.default(x, ...): Warning: main title can only be changed on first call of vioplot (when add = FALSE)
legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Width")

Median

The line median option is more suitable for side by side comparisions but the point option is still available also:

vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "point", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2")
vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "point", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T)
title(xlab = "Species", ylab = "Sepal Length")
legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width")

It may be necessary to include a points command to fix the median being overwritten by the following plots:

vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "point", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2")
vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "point", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T)
points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_large[grep(species, iris_large$Species),]$Sepal.Length))), pch = 21, col = "palevioletred4", bg = "palevioletred2")
title(xlab = "Species", ylab = "Sepal Length")
legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width")

Similarly points could be added where a line has been used previously:

vioplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right", pchMed = 21, colMed = "palevioletred4", colMed2 = "palevioletred2")
vioplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", pchMed = 21, colMed = "lightblue4", colMed2 = "lightblue2", add = T)
points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_large[grep(species, iris_large$Species),]$Sepal.Length))), pch = 21, col = "palevioletred4", bg = "palevioletred2")
points(1:length(levels(iris$Species)), as.numeric(sapply(levels(iris$Species), function(species) median(iris_small[grep(species, iris_small$Species),]$Sepal.Length))), pch = 21, col = "lightblue4", bg = "lightblue2")
title(xlab = "Species", ylab = "Sepal Length")
legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width")

Here it is aesthetically pleasing and intuitive to interpret categorical differences in mean and variation in a continuous variable.

Sources

These extensions to vioplot here are based on those provided here:

These have previously been discussed on the following sites:

vioplot/inst/doc/violin_area.html0000644000176200001440000110553214344507254016665 0ustar liggesusers Controlling Violin Plot Area

Controlling Violin Plot Area

Tom Kelly

2022-12-09

While boxplots have become the de facto standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Gaussian “Normal” distribution that most researchers have become accustomed to.

While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience.

##Violin Plots

Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits:

  • Greater flexibility for plotting variation than boxplots
  • More familiarity to boxplot users than density plots
  • Easier to directly compare data types than existing plots

As shown below for the iris dataset, violin plots show distribution information that the boxplot is unable to.

library("vioplot")
data(iris)
boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"))

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"))

##Violin Plot Area

However there are concerns that existing violin plot packages (such as ) scales the data to the most aesthetically suitable width rather than maintaining proportions comparable across data sets. Consider the differing distributions shown below:

par(mfrow=c(3, 1))
par(mar=rep(2, 4))
plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length: setosa", col="green")
plot(density(iris$Sepal.Length[iris$Species=="versicolor"]), main="Sepal Length: versicolor", col="blue")
plot(density(iris$Sepal.Length[iris$Species=="virginica"]), main="Sepal Length: virginica", col="palevioletred4")

par(mfrow=c(1, 1))

#Comparing datasets

Neither of these plots above show the relative distribtions on the same scale, even if we match the x-axis of a density plot the relative heights are obscured and difficult to compare.

par(mfrow=c(3, 1))
par(mar=rep(2, 4))
xaxis <- c(3, 9)
yaxis <- c(0, 1.25)
plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length: setosa", col="green", xlim=xaxis, ylim=yaxis)
plot(density(iris$Sepal.Length[iris$Species=="versicolor"]), main="Sepal Length: versicolor", col="blue", xlim=xaxis, ylim=yaxis)
plot(density(iris$Sepal.Length[iris$Species=="virginica"]), main="Sepal Length: virginica", col="palevioletred4", xlim=xaxis, ylim=yaxis)

par(mfrow=c(1, 1))

This can somewhat be addressed by overlaying density plots:

par(mfrow=c(1, 1))
xaxis <- c(3, 9)
yaxis <- c(0, 1.25)
plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length", col="green", xlim=xaxis, ylim=yaxis)
lines(density(iris$Sepal.Length[iris$Species=="versicolor"]), col="blue")
lines(density(iris$Sepal.Length[iris$Species=="virginica"]), col="palevioletred4")
legend("topright", fill=c("green", "blue", "palevioletred4"), legend=levels(iris$Species), cex=0.5)

This has the benefit of highlighting the different distributions of the data subsets. However, notice here that a figure legend become necessary, plot axis limits need to be defined to display the range of all distribution curves, and the plot quickly becomes cluttered if the number of factors to be compared becomes much larger.

##Area control in Violin plot

Therefore the areaEqual parameter has been added to customise the violin plot to serve a similar purpose:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", areaEqual = T)

If we compare this to the original vioplot functionality (defaulting to areaEqual = FALSE) the differences between the two are clear.

par(mfrow=c(2,1))
par(mar=rep(2, 4))
vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Width)", areaEqual = F)
vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T)

par(mfrow=c(1,1))

Note that areaEqual is considering the full area of the density distribution before removing the outlier tails. We leave it up to the users discretion which they elect to use. The areaEqual functionality is compatible with all of the customisation used in discussed in the main vioplot vignette

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), rectCol=c("green", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), border=c("darkolivegreen4", "royalblue4", "violetred4"))

The violin width can further be scaled with wex, which maintains the proportions across the datasets if areaEqual = TRUE:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), rectCol=c("green", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), border=c("darkolivegreen4", "royalblue4", "violetred4"), wex=1.25)

Comparing distributions

Notice the utility of areaEqual for cases where different datasets have different underlying distributions:

vioplot(rnorm(200, 3, 0.5), rpois(200, 2.5),  rbinom(100, 10, 0.4), rlnorm(200, 0, 0.5), rnbinom(200, 10, 0.9), rlogis(20, 0, 0.5), areaEqual = F, main="Equal Width", xlab="distribution", ylab="data value", names=c("normal", "poisson", "binomial", "log-normal", "neg-binomial", "logistic"))

vioplot(rnorm(200, 3, 0.5), rpois(200, 2.5),  rbinom(100, 10, 0.4), rlnorm(200, 0, 0.5), rnbinom(200, 10, 0.9), rlogis(20, 0, 0.5), areaEqual = T, main="Equal Area", xlab="distribution", ylab="data value", names=c("normal", "poisson", "binomial", "log-normal", "neg-binomial", "logistic"))

vioplot/inst/doc/violin_area.Rmd0000755000176200001440000001714114027267745016451 0ustar liggesusers--- title: "Controlling Violin Plot Area" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Controlling Violin Plot Area} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- While boxplots have become the _de facto_ standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Gaussian "Normal" distribution that most researchers have become accustomed to. While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience. ##Violin Plots Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits: - Greater flexibility for plotting variation than boxplots - More familiarity to boxplot users than density plots - Easier to directly compare data types than existing plots As shown below for the `iris` dataset, violin plots show distribution information that the boxplot is unable to. ```{r} library("vioplot") ``` ```{r, message=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) ``` ##Violin Plot Area However there are concerns that existing violin plot packages (such as \code{\link[vioplot]{vioplot}}) scales the data to the most aesthetically suitable width rather than maintaining proportions comparable across data sets. Consider the differing distributions shown below: ```{r, echo=FALSE, message=FALSE} par(mar=rep(1,4)) ``` ```{r} par(mfrow=c(3, 1)) par(mar=rep(2, 4)) plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length: setosa", col="green") plot(density(iris$Sepal.Length[iris$Species=="versicolor"]), main="Sepal Length: versicolor", col="blue") plot(density(iris$Sepal.Length[iris$Species=="virginica"]), main="Sepal Length: virginica", col="palevioletred4") par(mfrow=c(1, 1)) ``` ```{r, echo=FALSE, message=FALSE} par(mar=c(5, 4, 4, 2) + 0.1) ``` #Comparing datasets Neither of these plots above show the relative distribtions on the same scale, even if we match the x-axis of a density plot the relative heights are obscured and difficult to compare. ```{r, echo=FALSE, message=FALSE} par(mar=rep(2,4)) ``` ```{r} par(mfrow=c(3, 1)) par(mar=rep(2, 4)) xaxis <- c(3, 9) yaxis <- c(0, 1.25) plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length: setosa", col="green", xlim=xaxis, ylim=yaxis) plot(density(iris$Sepal.Length[iris$Species=="versicolor"]), main="Sepal Length: versicolor", col="blue", xlim=xaxis, ylim=yaxis) plot(density(iris$Sepal.Length[iris$Species=="virginica"]), main="Sepal Length: virginica", col="palevioletred4", xlim=xaxis, ylim=yaxis) par(mfrow=c(1, 1)) ``` ```{r, echo=FALSE, message=FALSE} par(mar=c(5, 4, 4, 2) + 0.1) ``` This can somewhat be addressed by overlaying density plots: ```{r} par(mfrow=c(1, 1)) xaxis <- c(3, 9) yaxis <- c(0, 1.25) plot(density(iris$Sepal.Length[iris$Species=="setosa"]), main="Sepal Length", col="green", xlim=xaxis, ylim=yaxis) lines(density(iris$Sepal.Length[iris$Species=="versicolor"]), col="blue") lines(density(iris$Sepal.Length[iris$Species=="virginica"]), col="palevioletred4") legend("topright", fill=c("green", "blue", "palevioletred4"), legend=levels(iris$Species), cex=0.5) ``` This has the benefit of highlighting the different distributions of the data subsets. However, notice here that a figure legend become necessary, plot axis limits need to be defined to display the range of all distribution curves, and the plot quickly becomes cluttered if the number of factors to be compared becomes much larger. ##Area control in Violin plot Therefore the `areaEqual` parameter has been added to customise the violin plot to serve a similar purpose: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", areaEqual = T) ``` If we compare this to the original vioplot functionality (defaulting to `areaEqual = FALSE`) the differences between the two are clear. ```{r, echo=FALSE, message=FALSE} par(mar=rep(2, 4)) ``` ```{r} par(mfrow=c(2,1)) par(mar=rep(2, 4)) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Width)", areaEqual = F) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T) par(mfrow=c(1,1)) ``` ```{r, echo=FALSE, message=FALSE} par(mar=c(5, 4, 4, 2) + 0.1) ``` Note that `areaEqual` is considering the full area of the density distribution before removing the outlier tails. We leave it up to the users discretion which they elect to use. The `areaEqual` functionality is compatible with all of the customisation used in discussed in [the main vioplot vignette](violin_customisation.html) ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), rectCol=c("green", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), border=c("darkolivegreen4", "royalblue4", "violetred4")) ``` The violin width can further be scaled with `wex`, which maintains the proportions across the datasets if `areaEqual = TRUE`: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), rectCol=c("green", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), border=c("darkolivegreen4", "royalblue4", "violetred4"), wex=1.25) ``` ## Comparing distributions Notice the utility of `areaEqual` for cases where different datasets have different underlying distributions: ```{r} vioplot(rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), rlnorm(200, 0, 0.5), rnbinom(200, 10, 0.9), rlogis(20, 0, 0.5), areaEqual = F, main="Equal Width", xlab="distribution", ylab="data value", names=c("normal", "poisson", "binomial", "log-normal", "neg-binomial", "logistic")) vioplot(rnorm(200, 3, 0.5), rpois(200, 2.5), rbinom(100, 10, 0.4), rlnorm(200, 0, 0.5), rnbinom(200, 10, 0.9), rlogis(20, 0, 0.5), areaEqual = T, main="Equal Area", xlab="distribution", ylab="data value", names=c("normal", "poisson", "binomial", "log-normal", "neg-binomial", "logistic")) ``` vioplot/inst/doc/violin_formulae.html0000644000176200001440000103775514344507255017603 0ustar liggesusers Customising Violin Plots with Formula Input

Customising Violin Plots with Formula Input

Tom Kelly

2022-12-09

Since boxplots have become the de facto standard for plotting the distribution of data most users are familiar with these and the formula input for dataframes. However this input is not available in the standard vioplot package. Thus it has been restored here for enhanced backwards compatibility with boxplot.

As shown below for the iris dataset, violin plots show distribution information taking formula input that boxplot implements but vioplot is unable to. This demonstrates the customisation demonstrated in the main vioplot vignette using vioplot syntax with the formula method commonly used for boxplot, t.test, and lm.

library("vioplot")
data(iris)
boxplot(Sepal.Length~Species, data = iris)

Whereas performing the same function does not work with vioplot (0.2).

devtools::install_version("vioplot", version = "0.2")
library("vioplot")
vioplot(Sepal.Length~Species, data = iris)
Error in min(data) : invalid 'type' (language) of argument

Plot Defaults

vioplot(Sepal.Length~Species, data = iris)

Another concern we see here is that the vioplot defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here:

vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length")

Plot colours: Violin Fill

Plot colours can be further customised as with the original vioplot package using the col argument:

vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue")

Vectorisation

However the vioplot (0.2) function is unable to colour each violin separately, thus this is enabled with a vectorised col in vioplot (0.3):

vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"))
legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5)

Plot colours: Violin Lines and Boxplot

Colours can also be customised for the violin fill and border separately using the col and border arguments:

vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue")

Similarly, the arguments lineCol and rectCol specify the colours of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour.

vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", rectCol="palevioletred", lineCol="violetred")

The same applies to the colour of the median point with colMed:

vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", colMed="violet")

### Combined customisation

These can be customised colours can be combined:

vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet")

Vectorisation

These colour and shape settings can also be customised separately for each violin:

vioplot(Sepal.Length~Species, data = iris, main="Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19))

vioplot/inst/doc/violin_ylog.html0000644000176200001440000043372614344507257016742 0ustar liggesusers Controlling y-axis Plotting

Controlling y-axis Plotting

Tom Kelly

2022-12-09

While boxplots have become the de facto standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Gaussian “Normal” distribution that most researchers have become accustomed to.

While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience.

##Violin Plots

Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits:

  • Greater flexibility for plotting variation than boxplots
  • More familiarity to boxplot users than density plots
  • Easier to directly compare data types than existing plots

As shown below for the iris dataset, violin plots show distribution information that the boxplot is unable to.

library("vioplot")
data(iris)
boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"))

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"))

##Violin y-axis

###Logarithmic scale

However the existing violin plot packages (such as ) do not support log-scale of the y-axis. This has been amended with the ylog argument.

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = T, ylim=c(log(1), log(10)))
## Warning in plot.window(xlim, ylim, log = log, asp = asp, bty = bty, cex = cex, :
## nonfinite axis=2 limits [GScale(-inf,0.362216,..); log=TRUE] -- corrected now

## Warning in plot.window(xlim, ylim, log = log, asp = asp, bty = bty, cex = cex, :
## nonfinite axis=2 limits [GScale(-inf,0.362216,..); log=TRUE] -- corrected now

This can also be invoked with the log="y" argument compatible with boxplot:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = T, ylim=c(log(1), log(10)))
vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = "y", ylim=c(log(1), log(10)))

###custom y-axes

The y-axes can also be removed with yaxt="n" to enable customised y-axes:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n")

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = T, yaxt="n", ylim=c(log(1), log(10)))
## Warning in plot.window(xlim, ylim, log = log, asp = asp, bty = bty, cex = cex, :
## nonfinite axis=2 limits [GScale(-inf,0.362216,..); log=TRUE] -- corrected now

## Warning in plot.window(xlim, ylim, log = log, asp = asp, bty = bty, cex = cex, :
## nonfinite axis=2 limits [GScale(-inf,0.362216,..); log=TRUE] -- corrected now

Thus custom axes can be added to violin plots. As shown on a linear scale:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n")
axis(2, at=1:10, labels=1:10)

As well as for on a log scale:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n", log="y", ylim=c(log(4), log(9)))
axis(2, at=log(1:10), labels=1:10)

vioplot/inst/doc/histogram_customisation.html0000644000176200001440000106343214344507251021352 0ustar liggesusers Customising Violin Plots with Histograms

Customising Violin Plots with Histograms

Tom Kelly

2022-12-09

While boxplots have become the de facto standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Gaussian “Normal” distribution that most researchers have become accustomed to.

While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience.

Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits:

  • Greater flexibility for plotting variation than boxplots
  • More familiarity to boxplot users than density plots
  • Easier to directly compare data types than existing plots

As shown below for the iris dataset, histogram plots show distribution information that the boxplot is unable to.

library("vioplot")
## Loading required package: sm
## Package 'sm', version 2.2-5.7: type help(sm) for summary information
## Loading required package: zoo
## 
## Attaching package: 'zoo'
## The following objects are masked from 'package:base':
## 
##     as.Date, as.Date.numeric
data(iris)
boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"))
library("vioplot")
vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"))

Plot Defaults

However as we can see here the plot defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length")

Histogram plot

Here we introduce a variant of the violin plot, using a mirrored bihistogram to show the distribution:

histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length")

Plot colours: Histogram Fill

Plot colours can be further customised as with the original viooplot package using the col argument:

histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue")

Vectorisation

The vioplot (0.2) function is unable to colour each histogram separately, thus this is enabled with a vectorised col in viooplot (0.3) and histoplot (0.4):

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"))
legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5)

histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"))
legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5)

Plot colours: Violin Lines and Boxplot

Colours can also be customised for the histogram fill and border separately using the col and border arguments:

histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue")

Similarly, the arguments lineCol and rectCol specify the colors of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour.

histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", rectCol="palevioletred", lineCol="violetred")

The same applies to the colour of the median point with colMed:

histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", colMed="violet")

### Combined customisation

These can be customised colours can be combined:

histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet")

Vectorisation

These color and shape settings can also be customised separately for each histogram:

histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19))

Split Bihistogram Plots

We set up the data with two categories (Sepal Width) as follows:

data(iris)
summary(iris$Sepal.Width)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##   2.000   2.800   3.000   3.057   3.300   4.400
table(iris$Sepal.Width > mean(iris$Sepal.Width))
## 
## FALSE  TRUE 
##    83    67
iris_large <- iris[iris$Sepal.Width > mean(iris$Sepal.Width), ]
iris_small <- iris[iris$Sepal.Width <= mean(iris$Sepal.Width), ]

A direct comparision of 2 datasets can be made with the side argument and add = TRUE on the second plot:

histoplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right")
histoplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T)
title(xlab = "Species", ylab = "Sepal Length")
legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width")

vioplot/inst/doc/histogram_customisation.R0000644000176200001440000001315714344507251020605 0ustar liggesusers## ----------------------------------------------------------------------------- library("vioplot") ## ---- message=FALSE, eval=FALSE----------------------------------------------- # data(iris) # boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) # library("vioplot") # vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) ## ---- message=FALSE, echo=FALSE----------------------------------------------- data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta") ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") ## ----------------------------------------------------------------------------- histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") ## ----------------------------------------------------------------------------- histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue") ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ## ----------------------------------------------------------------------------- histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ## ----------------------------------------------------------------------------- histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue") ## ----------------------------------------------------------------------------- histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") ## ----------------------------------------------------------------------------- histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", colMed="violet") ## ----------------------------------------------------------------------------- histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") ## ----------------------------------------------------------------------------- histoplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) ## ---- message=FALSE----------------------------------------------------------- data(iris) summary(iris$Sepal.Width) table(iris$Sepal.Width > mean(iris$Sepal.Width)) iris_large <- iris[iris$Sepal.Width > mean(iris$Sepal.Width), ] iris_small <- iris[iris$Sepal.Width <= mean(iris$Sepal.Width), ] ## ---- fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'---- histoplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right") histoplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T) title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") vioplot/inst/doc/violin_ylog.R0000644000176200001440000000526314344507257016166 0ustar liggesusers## ----------------------------------------------------------------------------- library("vioplot") ## ---- message=FALSE----------------------------------------------------------- data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = T, ylim=c(log(1), log(10))) ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = T, ylim=c(log(1), log(10))) vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", log = "y", ylim=c(log(1), log(10))) ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", ylog = T, yaxt="n", ylim=c(log(1), log(10))) ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n") axis(2, at=1:10, labels=1:10) ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length", yaxt="n", log="y", ylim=c(log(4), log(9))) axis(2, at=log(1:10), labels=1:10) vioplot/inst/doc/overlaying_annotations.html0000644000176200001440000026212114344507253021165 0ustar liggesusers Overlaying base R graphics

Overlaying base R graphics

Tom Kelly

2022-12-09

Introduction: Integration with base R graphics

Here we demonstrate how to combine violin plots with other base R graphics. In principle any base R graphics can be overlayed on top of a violin plot for annotation.

Many problems can be resolved by overlaying base R graphics and integrating vioplot with other plotting functions. Any additional elements can be overlayed by running commands after generating the plot. The x-axes are integer values [1,2,3,…] for each violin. The y-axes are continuous values as displayed.

The following plotting elements are supported for example: points, lines, polygon

It is also possible to modify plotting parameters with: title, axis, legend

“vioplot()” functions similar to “plot()” and passes input arguments from “par()”.

Plotting violins with highlighted medians

For example it is possible to add additional annotations.

# generate dummy data
a <- rnorm(25, 3, 0.5)
b <- rnorm(25, 2, 1.0)
c <- rnorm(25, 2.75, 0.25)
d <- rnorm(25, 3.15, 0.375)
e <- rnorm(25, 1, 0.25)
datamat <- cbind(a, b, c, d, e)
dim(datamat)
## [1] 25  5
library("vioplot")
vioplot(datamat, ylim = c(0, 5))
# compute medians
data.med <- apply(datamat, 2, median)
data.med
##         a         b         c         d         e 
## 2.9040581 1.3545198 2.8318172 3.1564032 0.9491974
#overlay medians
lines(data.med, lty = 2, lwd = 1.5)
points(data.med, pch = 19, col = "red", cex = 2.25)

Custom axes and titles

It is also possible to modify the axes labels and titles as shown in this example. Here default axes are suppressed and replaced with custom parameters.

outcome <- c(rnorm(25, 3, 1), rnorm(25, 2, 0.5))
intervention <- c(rep("treatment", 25), rep("control", 25))
table(intervention)
## intervention
##   control treatment 
##        25        25
names(table(intervention))
## [1] "control"   "treatment"
unique(sort(intervention))
## [1] "control"   "treatment"
intervention <- as.factor(intervention)
levels(intervention)
## [1] "control"   "treatment"
d <- data.frame(outcome, intervention)
vioplot(outcome ~ intervention, data = d, xaxt = 'n', yaxt = 'n', 
        main = "", xlab = "", ylab = "")
axis(side = 1, at = 1:length(levels(intervention)), labels = levels(intervention))
mtext("custom x labels for intervention", side = 1)
mtext("custom y labels for outcome", side = 2)
title(main = "example with custom title", sub = "subtitles are supported")

Annotated histograms

This is also supported by the histogram plot.

histoplot(outcome ~ intervention, data = d, xaxt = 'n', yaxt = 'n', 
        main = "", xlab = "", ylab = "")
axis(side = 1, at = 1:length(levels(intervention)), labels = levels(intervention))
mtext("custom x labels for intervention", side = 1)
mtext("custom y labels for outcome", side = 2)
title(main = "example with custom title", sub = "subtitles are supported")

vioplot/inst/doc/histogram_formulae.R0000644000176200001440000000671114344507252017515 0ustar liggesusers## ----------------------------------------------------------------------------- library("vioplot") ## ---- message=FALSE, eval=FALSE----------------------------------------------- # data(iris) # boxplot(Sepal.Length~Species, data = iris) ## ---- message=FALSE, echo=FALSE----------------------------------------------- data(iris) boxplot(Sepal.Length~Species, data = iris, main = "Sepal Length") ## ---- message=FALSE, eval=FALSE----------------------------------------------- # devtools::install_version("vioplot", version = "0.2") # library("vioplot") # vioplot(Sepal.Length~Species, data = iris) ## ---- message=FALSE, eval=FALSE----------------------------------------------- # vioplot(Sepal.Length~Species, data = iris) ## ---- message=FALSE, echo=FALSE----------------------------------------------- vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="magenta") ## ----------------------------------------------------------------------------- vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length") ## ----------------------------------------------------------------------------- histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue") ## ----------------------------------------------------------------------------- histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ## ----------------------------------------------------------------------------- histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue") ## ----------------------------------------------------------------------------- histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") ## ----------------------------------------------------------------------------- histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", colMed="violet") ## ----------------------------------------------------------------------------- histoplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") ## ----------------------------------------------------------------------------- histoplot(Sepal.Length~Species, data = iris, main="Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) ## ---- message=FALSE----------------------------------------------------------- data(iris) summary(iris$Sepal.Width) table(iris$Sepal.Width > mean(iris$Sepal.Width)) iris_large <- iris[iris$Sepal.Width > mean(iris$Sepal.Width), ] iris_small <- iris[iris$Sepal.Width <= mean(iris$Sepal.Width), ] ## ---- fig.align = 'center', fig.height = 3, fig.width = 6, fig.keep = 'last'---- histoplot(Sepal.Length~Species, data=iris_large, col = "palevioletred", plotCentre = "line", side = "right") histoplot(Sepal.Length~Species, data=iris_small, col = "lightblue", plotCentre = "line", side = "left", add = T) title(xlab = "Species", ylab = "Sepal Length") legend("topleft", fill = c("lightblue", "palevioletred"), legend = c("small", "large"), title = "Sepal Width") vioplot/inst/doc/violin_customisation.html0000644000176200001440000073524514344507254020667 0ustar liggesusers Customising Violin Plots

Customising Violin Plots

Tom Kelly

2022-12-09

While boxplots have become the de facto standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Gaussian “Normal” distribution that most researchers have become accustomed to.

While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience.

Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits:

  • Greater flexibility for plotting variation than boxplots
  • More familiarity to boxplot users than density plots
  • Easier to directly compare data types than existing plots

As shown below for the iris dataset, violin plots show distribution information that the boxplot is unable to.

library("vioplot")
data(iris)
boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"))
library("vioplot")
vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"))

Plot Defaults

However as we can see here the plot defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length")

Plot colours: Violin Fill

Plot colours can be further customised as with the original vioplot package using the col argument:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue")

Vectorisation

However the vioplot (0.2) function is unable to colour each violin separately, thus this is enabled with a vectorised col in vioplot (0.3):

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"))
legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5)

Plot colours: Violin Lines and Boxplot

Colours can also be customised for the violin fill and border separately using the col and border arguments:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue")

Similarly, the arguments lineCol and rectCol specify the colors of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour.

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", rectCol="palevioletred", lineCol="violetred")

The same applies to the colour of the median point with colMed:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", colMed="violet")

### Combined customisation

These can be customised colours can be combined:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet")

Vectorisation

These color and shape settings can also be customised separately for each violin:

vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19))

This should be sufficient to customise the violin plot but further examples are given in the areaEqual vioplot vignette including how violin plots are useful for comparing variation when data does not follow the same distribution. This document also compares the violin plot with other established methods to plot data variation.

vioplot/inst/doc/violin_formulae.Rmd0000755000176200001440000001011014027267745017340 0ustar liggesusers--- title: "Customising Violin Plots with Formula Input" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Customising Violin Plots with Formula Input} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- Since boxplots have become the _de facto_ standard for plotting the distribution of data most users are familiar with these and the formula input for dataframes. However this input is not available in the standard `vioplot` package. Thus it has been restored here for enhanced backwards compatibility with `boxplot`. As shown below for the `iris` dataset, violin plots show distribution information taking formula input that `boxplot` implements but `vioplot` is unable to. This demonstrates the customisation demonstrated in [the main vioplot vignette using vioplot syntax](violin_customisation.html) with the formula method commonly used for `boxplot`, `t.test`, and `lm`. ```{r} library("vioplot") ``` ```{r, message=FALSE, eval=FALSE} data(iris) boxplot(Sepal.Length~Species, data = iris) ``` ```{r, message=FALSE, echo=FALSE} data(iris) boxplot(Sepal.Length~Species, data = iris, main = "Sepal Length") ``` Whereas performing the same function does not work with `vioplot` (0.2). ```{r, message=FALSE, eval=FALSE} devtools::install_version("vioplot", version = "0.2") library("vioplot") vioplot(Sepal.Length~Species, data = iris) ``` ``` Error in min(data) : invalid 'type' (language) of argument ``` ## Plot Defaults ```{r, message=FALSE, eval=FALSE} vioplot(Sepal.Length~Species, data = iris) ``` ```{r, message=FALSE, echo=FALSE} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="magenta") ``` Another concern we see here is that the `vioplot` defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length") ``` ## Plot colours: Violin Fill Plot colours can be further customised as with the original vioplot package using the `col` argument: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue") ``` ### Vectorisation However the `vioplot` (0.2) function is unable to colour each violin separately, thus this is enabled with a vectorised `col` in `vioplot` (0.3): ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ``` ## Plot colours: Violin Lines and Boxplot Colours can also be customised for the violin fill and border separately using the `col` and `border` arguments: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue") ``` Similarly, the arguments `lineCol` and `rectCol` specify the colours of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour. ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") ``` The same applies to the colour of the median point with `colMed`: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", colMed="violet") ``` ### Combined customisation These can be customised colours can be combined: ```{r} vioplot(Sepal.Length~Species, data = iris, main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") ``` ### Vectorisation These colour and shape settings can also be customised separately for each violin: ```{r} vioplot(Sepal.Length~Species, data = iris, main="Sepal Length", col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) ``` vioplot/inst/doc/violin_customisation.R0000644000176200001440000001014514344507254020105 0ustar liggesusers## ----------------------------------------------------------------------------- library("vioplot") ## ---- message=FALSE, eval=FALSE----------------------------------------------- # data(iris) # boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) # library("vioplot") # vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) ## ---- message=FALSE, echo=FALSE----------------------------------------------- data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta") ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue") ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue") ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", colMed="violet") ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") ## ----------------------------------------------------------------------------- vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) vioplot/inst/doc/violin_customisation.Rmd0000755000176200001440000001453314027267745020444 0ustar liggesusers--- title: "Customising Violin Plots" author: "Tom Kelly" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{vioplot: Customising Violin Plots} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- While boxplots have become the _de facto_ standard for plotting the distribution of data this is a vast oversimplification and may not show everything needed to evaluate the variation of data. This is particularly important for datasets which do not form a Gaussian "Normal" distribution that most researchers have become accustomed to. While density plots are helpful in this regard, they can be less aesthetically pleasing than boxplots and harder to interpret for those familiar with boxplots. Often the only ways to compare multiple data types with density use slices of the data with faceting the plotting panes or overlaying density curves with colours and a legend. This approach is jarring for new users and leads to cluttered plots difficult to present to a wider audience. Therefore violin plots are a powerful tool to assist researchers to visualise data, particularly in the quality checking and exploratory parts of an analysis. Violin plots have many benefits: - Greater flexibility for plotting variation than boxplots - More familiarity to boxplot users than density plots - Easier to directly compare data types than existing plots As shown below for the `iris` dataset, violin plots show distribution information that the boxplot is unable to. ```{r} library("vioplot") ``` ```{r, message=FALSE, eval=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) library("vioplot") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica")) ``` ```{r, message=FALSE, echo=FALSE} data(iris) boxplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="magenta") ``` ## Plot Defaults However as we can see here the plot defaults are not aesthetically pleasing, with a rather glaring colour scheme unsuitable for professional or academic usage. Thus the plot default colours have been changed as shown here: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length") ``` ## Plot colours: Violin Fill Plot colours can be further customised as with the original vioplot package using the `col` argument: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue") ``` ### Vectorisation However the `vioplot` (0.2) function is unable to colour each violin separately, thus this is enabled with a vectorised `col` in `vioplot` (0.3): ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col=c("lightgreen", "lightblue", "palevioletred")) legend("topleft", legend=c("setosa", "versicolor", "virginica"), fill=c("lightgreen", "lightblue", "palevioletred"), cex = 0.5) ``` ## Plot colours: Violin Lines and Boxplot Colours can also be customised for the violin fill and border separately using the `col` and `border` arguments: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue") ``` Similarly, the arguments `lineCol` and `rectCol` specify the colors of the boxplot outline and rectangle fill. For simplicity the box and whiskers of the boxplot will always have the same colour. ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", rectCol="palevioletred", lineCol="violetred") ``` The same applies to the colour of the median point with `colMed`: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", colMed="violet") ``` ### Combined customisation These can be customised colours can be combined: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main = "Sepal Length", col="lightblue", border="royalblue", rectCol="palevioletred", lineCol="violetred", colMed="violet") ``` ### Vectorisation These color and shape settings can also be customised separately for each violin: ```{r} vioplot(iris$Sepal.Length[iris$Species=="setosa"], iris$Sepal.Length[iris$Species=="versicolor"], iris$Sepal.Length[iris$Species=="virginica"], names=c("setosa", "versicolor", "virginica"), main="Sepal Length (Equal Area)", areaEqual = T, col=c("lightgreen", "lightblue", "palevioletred"), border=c("darkolivegreen4", "royalblue4", "violetred4"), rectCol=c("forestgreen", "blue", "palevioletred3"), lineCol=c("darkolivegreen", "royalblue", "violetred4"), colMed=c("green", "cyan", "magenta"), pchMed=c(15, 17, 19)) ``` This should be sufficient to customise the violin plot but further examples are given in [the areaEqual vioplot vignette](violin_area.html) including how violin plots are useful for comparing variation when data does not follow the same distribution. This document also compares the violin plot with other established methods to plot data variation. vioplot/inst/COPYRIGHT0000755000176200001440000000303614344404314014224 0ustar liggesusersCopyright (c) 2004, Daniel Adler All rights reserved. Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: * Redistributions of source code must retain the above copyright notice, this list of conditions and the following disclaimer. * Redistributions in binary form must reproduce the above copyright notice, this list of conditions and the following disclaimer in the documentation and/or other materials provided with the distribution. * Neither the name of the University of Goettingen nor the names of its contributors may be used to endorse or promote products derived from this software without specific prior written permission. THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. vioplot/inst/CITATION0000755000176200001440000000132314344507126014070 0ustar liggesuserscitHeader("To cite the enhanced vioplot package in publications use:") citEntry(entry = "Manual", title = "vioplot: violin plot", author = personList(as.person("Daniel Adler"),as.person("S. Thomas Kelly"), as.person("Tom Elliott"), as.person("Jordan Adamson")), year = "2022", note = "R package version 0.4.0", url = "https://github.com/TomKellyGenetics/vioplot", textVersion = paste("Daniel Adler, S. Thomas Kelly, Tom Elliott, and Jordan Adamson (2022). vioplot: violin plot. R package version 0.4.0", "https://github.com/TomKellyGenetics/vioplot") ) citFooter(paste("Please also acknowledge the original package: \n citation(", "vioplot", ")", sep="\""))