proxy/0000755000175100001440000000000014250310004011436 5ustar hornikusersproxy/NAMESPACE0000755000175100001440000000175013261704567012711 0ustar hornikusersimport("stats") importFrom("utils", "formatUL") export("dist", "simil", "as.dist", "as.simil", "as.matrix", "pr_simil2dist", "pr_dist2simil", "row.dist", "col.dist", "rowSums.dist", "colSums.dist", "rowMeans.dist", "colMeans.dist", "pr_DB") S3method("dim", "dist") S3method("dimnames", "dist") S3method("dimnames<-", "dist") S3method("names", "dist") S3method("names<-", "dist") S3method("subset", "dist") S3method("[[", "dist") S3method("as.matrix", "dist") S3method("as.matrix", "simil") S3method("print","crossdist") S3method("print","pairdist") S3method("print","simil") S3method("summary","pr_DB") S3method("print","summary.pr_DB") ## generic registry stuff S3method("[[", "proxy_registry") S3method("length", "proxy_registry") S3method("print","proxy_registry") S3method("print","registry_field") S3method("print","registry_entry") S3method("summary","proxy_registry") S3method("as.data.frame","proxy_registry") useDynLib("proxy", .registration = TRUE) proxy/man/0000755000175100001440000000000014250212733012223 5ustar hornikusersproxy/man/rowSums.dist.Rd0000755000175100001440000000264710643260444015153 0ustar hornikusers\name{rowSums.dist} \alias{rowSums.dist} \alias{colSums.dist} \alias{rowMeans.dist} \alias{colMeans.dist} \title{Row Sums/Means of Sparse Symmetric Matrices} \description{ Compute the row (column) sums or means for a sparse symmetric (distance) matrix. } \usage{ rowSums.dist(x, na.rm = FALSE) rowMeans.dist(x, na.rm = FALSE, diag = TRUE) colSums.dist(x, na.rm = FALSE) colMeans.dist(x, na.rm = FALSE, diag = TRUE) } \arguments{ \item{x}{an object of class \code{dist}.} \item{na.rm}{logical, should missing values (including \code{NaN}) be omitted from the summation?} \item{diag}{logical, should the diagonal elements be included in the computation?} } \details{ These functions are more efficient than expanding an object of class \code{dist} to matrix and using \code{rowSums} or \code{rowMeans}. \code{colSums} and \code{colMeans} are provided for convenience. However, note that due to symmetry the result is always the same as for \code{rowSums} or \code{rowMeans}. } \value{ A numeric vector of row sums. } \author{Christian Buchta} \seealso{\code{as.matrix}, \code{as.dist}, and \code{rowSums}.} \examples{ ## x <- matrix(runif(10*2),ncol=2) d <- dist(x) rowSums(as.matrix(d)) rowSums.dist(d) # the same rowMeans(as.matrix(d)) rowMeans.dist(d) # the same rowMeans.dist(d, diag = FALSE) # not the same ## NAs d[3] <- NA rowSums.dist(d, na.rm = TRUE) rowMeans.dist(d, na.rm = TRUE) } \keyword{cluster} proxy/man/dist.Rd0000755000175100001440000001514214250213074013462 0ustar hornikusers\name{dist} \alias{dist} \alias{simil} \alias{print.simil} \alias{print.dist} \alias{print.crosssimil} \alias{print.crossdist} \alias{as.matrix.dist} \alias{as.matrix.simil} \alias{pr_simil2dist} \alias{pr_dist2simil} \alias{as.matrix} \alias{as.dist} \alias{as.simil} \alias{row.dist} \alias{col.dist} \title{Matrix Distance/Similarity Computation} \description{ These functions compute and return the auto-distance/similarity matrix between either rows or columns of a matrix/data frame, or a list, as well as the cross-distance matrix between two matrices/data frames/lists. } \usage{ dist(x, y = NULL, method = NULL, ..., diag = FALSE, upper = FALSE, pairwise = FALSE, by_rows = TRUE, convert_similarities = TRUE, auto_convert_data_frames = TRUE) simil(x, y = NULL, method = NULL, ..., diag = FALSE, upper = FALSE, pairwise = FALSE, by_rows = TRUE, convert_distances = TRUE, auto_convert_data_frames = TRUE) pr_dist2simil(x) pr_simil2dist(x) as.dist(x, FUN = NULL) as.simil(x, FUN = NULL) \method{as.matrix}{dist}(x, diag = 0, \dots) \method{as.matrix}{simil}(x, diag = NA, \dots) } \arguments{ \item{x}{For \code{dist} and \code{simil}, a numeric matrix object, a data frame, or a list. A vector will be converted into a column matrix. For \code{as.simil} and \code{as.dist}, an object of class \code{dist} and \code{simil}, respectively, or a numeric matrix. For \code{pr_dist2simil} and \code{pr_simil2dist}, any numeric vector.} \item{y}{\code{NULL}, or a similar object than \code{x}} \item{method}{a function, a registry entry, or a mnemonic string referencing the proximity measure. A list of all available measures can be obtained using \code{\link{pr_DB}} (see examples). The default for \code{dist} is \code{"Euclidean"}, and for \code{simil} \code{"correlation"}.} \item{diag}{logical value indicating whether the diagonal of the distance/similarity matrix should be printed by \code{\link{print.dist}}/\code{\link{print.simil}}. Note that the diagonal values are never stored in \code{dist} objects. In the context of \code{as.matrix} the value to use on the diagonal representing self-proximities. In case of similarities, this defaults to \code{NA} since a priori there are no upper bounds, so the maximum similarity needs to be specified by the user.} \item{upper}{logical value indicating whether the upper triangle of the distance/similarity matrix should be printed by \code{\link{print.dist}}/\code{\link{print.simil}}} \item{pairwise}{logical value indicating whether distances should be computed for the pairs of \code{x} and \code{y} only.} \item{by_rows}{logical indicating whether proximities between rows, or columns should be computed.} \item{convert_similarities, convert_distances}{logical indicating whether distances should be automatically converted into similarities (and the other way round) if needed.} \item{auto_convert_data_frames}{logical indicating whether data frames should be converted to matrices if all variables are numeric, or all are logical, or all are complex.} \item{FUN}{optional function to be used by \code{as.dist} and \code{as.simil}. If \code{NULL}, it is looked up in the method registry. If there is none specified there, \code{FUN} defaults to \code{pr_simil2dist} and \code{pr_dist2simil}, respectively.} \item{\dots}{further arguments passed to the proximity function.} } \details{ The interface is fashioned after \code{\link[stats]{dist}}, but can also compute cross-distances, and allows user extensions by means of registry of all proximity measures (see \code{\link{pr_DB}}). Missing values are allowed but are excluded from all computations involving the rows within which they occur. If some columns are excluded in calculating a Euclidean, Manhattan, Canberra or Minkowski distance, the sum is scaled up proportionally to the number of columns used (compare \code{\link[stats]{dist}} in package \pkg{stats}). Data frames are silently coerced to matrix if all columns are of (same) mode \code{numeric} or \code{logical}. Distance measures can be used with \code{simil}, and similarity measures with \code{dist}. In these cases, the result is transformed accordingly using the specified coercion functions (default: \eqn{\mathrm{pr\_simil2dist}(x) = 1 - \mathrm{abs}(x)}{pr_simil2dist(x) = 1 - abs(x)} and \eqn{\mathrm{pr\_dist2simil}(x) = 1 / (1 + x)}{pr_dist2simil(x) = 1 / (1 + x)}). Objects of class \code{simil} and \code{dist} can be converted one in another using \code{as.dist} and \code{as.simil}, respectively. Distance and similarity objects can conveniently be subset (see examples). Note that duplicate indexes are silently ignored. } \value{ Auto distances/similarities are returned as an object of class \code{dist}/\code{simil} and cross-distances/similarities as an object of class \code{crossdist}/\code{crosssimil}. } \references{ Anderberg, M.R. (1973), \emph{Cluster analysis for applications}, 359 pp., Academic Press, New York, NY, USA. Cox, M.F. and Cox, M.A.A. (2001), \emph{Multidimensional Scaling}, Chapman and Hall. Sokol, R.S. and Sneath P.H.A (1963), \emph{Principles of Numerical Taxonomy}, W. H. Freeman and Co., San Francisco. } \author{David Meyer \email{David.Meyer@R-project.org} and Christian Buchta \email{Christian.Buchta@wu-wien.ac.at}} \seealso{\code{\link[stats]{dist}} for compatibility information, and \code{\link{pr_DB}} for the proximity data base.} \examples{ ### show available proximities summary(pr_DB) ### get more information about a particular one pr_DB$get_entry("Jaccard") ### binary data x <- matrix(sample(c(FALSE, TRUE), 8, rep = TRUE), ncol = 2) dist(x, method = "Jaccard") ### for real-valued data dist(x, method = "eJaccard") ### for positive real-valued data dist(x, method = "fJaccard") ### cross distances dist(x, x, method = "Jaccard") ### pairwise (diagonal) dist(x, x, method = "Jaccard", pairwise = TRUE) ### this is the same but less efficient as.matrix(stats::dist(x, method = "binary")) ### numeric data x <- matrix(rnorm(16), ncol = 4) ## test inheritance of names rownames(x) <- LETTERS[1:4] colnames(x) <- letters[1:4] dist(x) dist(x, x) ## custom distance function f <- function(x, y) sum(x * y) dist(x, f) ## working with lists z <- unlist(apply(x, 1, list), recursive = FALSE) (d <- dist(z)) dist(z, z) ## subsetting d[[1:2]] subset(d, c(1,3,4)) d[[c(1,2,2)]] # duplicate index gets ignored ## transformations and self-proximities as.matrix(as.simil(d, function(x) exp(-x)), diag = 1) ## row and column indexes row.dist(d) col.dist(d) } \keyword{cluster} proxy/man/registry.Rd0000644000175100001440000002157314246405761014404 0ustar hornikusers\name{pr_DB} \alias{pr_DB} \alias{registry} \alias{summary.pr_DB} %- Also NEED an '\alias' for EACH other topic documented here. \title{Registry of proximities} \description{ Registry containing similarities and distances. } \usage{ pr_DB \special{pr_DB$get_field(name)} \special{pr_DB$get_fields()} \special{pr_DB$get_field_names()} \special{pr_DB$set_field(name, default = NA, type = NA, is_mandatory = FALSE, is_modifiable = TRUE, validity_FUN = NULL)} \special{pr_DB$entry_exists(name)} \special{pr_DB$get_entry(name)} \special{pr_DB$get_entries(name = NULL, pattern = NULL)} \special{pr_DB$get_entry_names(name)} \special{pr_DB$set_entry(...)} \special{pr_DB$modify_entry(...)} \special{pr_DB$delete_entry(name)} \method{summary}{pr_DB}(object, verbosity = c("short", "long"), ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{name}{character string representing the name of an entry (case-insensitive).} \item{pattern}{regular expression to be matched to all fields of class \code{"character"} in all entries.} \item{default}{optional default value for the field.} \item{type}{optional character string specifying the class to be required for this field. If \code{type} is a character vector with more than two elements, the entries will be used as fixed set of alternatives. If \code{type} is not a character string or vector, the class will be inferred from the argument given.} \item{is_mandatory}{logical specifying whether new entries are required to have a value for this field.} \item{is_modifiable}{logical specifying whether entries can be changed with respect to that field.} \item{validity_FUN}{optional function or character string with the name of a function that checks the validity of a field entry. Such a function gets the value to be investigated as argument, and should stop with an error message if the value is not correct.} \item{object}{a registry object.} \item{verbosity}{controlling the verbosity of the output of the summary method for the registry. \code{"short"} gives just a list, \code{"long"} also gives the formulas.} \item{\dots}{for \code{pr_DB$set_entry} and \code{pr_DB$modify_entry}: named list of fields to be modified in or added to the registry (see details). This must include the index field (\code{"names"}).} } \details{ \code{pr_DB} represents the registry of all proximity measures available. For each measure, it comprises meta-information that can be queried and extended. Also, new measures can be added. This is done using the following accessor functions of the \code{pr_DB} object: \code{get_field_names()} returns a character vector with all field names. \code{get_field()} returns the information for a specific field as a list with components named as described above. \code{get_fields()} returns a list with all field entries. \code{set_field()} is used to create new fields in the repository (the default value will be set in all entries). \code{get_entry_names()} returns a character vector with (the first alias of) all entries. \code{entry_exists()} is a predicate checking if an entry with the specified alias exists in the registry. \code{get_entry()} returns the specified entry if it exists (and, by default, gives an error if it does not). \code{get_entries()} is used to query more than one entry: either those matching \code{name} exactly, or those where the regular expression in \code{pattern} matches \emph{any} character field in an entry. By default, all values are returned. \code{delete_entry} removes an existing entry from the registry (note that only user-provided entries can be deleted). \code{set_entry} and \code{modify_entry} require a named list of arguments used as field entries. At least the \code{names} index field is required. \code{set_entry} will check for all other mandatory fields. If specified in the field meta data, each field entry and the entry as a whole is checked for validity. Note that only user-specified fields and/or entries can be modified, the data shipped with the package are read-only. The registry fields currently available are as follows: \describe{ \item{FUN}{Function to register (see below).} \item{names}{Character vector with an alias(es) for the measure.} \item{PREFUN}{Optional function (or function name) for preprocessing code (see below).} \item{POSTFUN}{Optional function (or function name) for postprocessing code (see below).} \item{distance}{logical indicating whether this measure is a distance (\code{TRUE}) or similarity (\code{FALSE}).} \item{convert}{Optional Function or function name for converting between similarities and distances when needed.} \item{type}{Optional, the scale the measure applies to (\code{"metric"}, \code{"ordinal"}, \code{"nominal"}, \code{"binary"}, or \code{"other"}). If \code{NULL}, it is assumed to apply to some other unknown scale.} \item{loop}{logical indicating whether \code{FUN} is just a measure, and therefore, if \code{\link{dist}} shall do the loop over all pairs of observations/variables, or if \code{FUN} does the loop on its own.} \item{\code{C_FUN}}{logical indicating whether \code{FUN} is a C function.} \item{abcd}{logical; if \code{TRUE} and binary data (or data to be interpreted as such) are supplied, the number of concordant and discordant pairs is precomputed for every two binary data vectors and supplied to the measure function.} \item{formula}{Optional character string with the symbolic representation of the formula.} \item{reference}{Optional reference (character).} \item{description}{Optional description (character). Ideally, describes the context in which the measure can be applied.} } A function specified as \code{FUN} parameter has mandatory arguments \code{x} and \code{y} (if \code{abcd} is \code{FALSE}), and \code{a}, \code{b}, \code{c}, \code{d}, \code{n} otherwise. Additionally, it gets all optional parameters specified by the user in the \code{\dots} argument of the \code{\link{dist}} and \code{\link{simil}} functions, possibly changed and/or complemented by the corresponding (optional) \code{PREFUN} function. It must return the (diss-)similarity value computed from the arguments. \code{x} and \code{y} are two vectors from the data matrix (matrices) supplied. If \code{abcd} is \code{FALSE}, it is assumed that binary measures will be used, and the number of all \code{n} concordant and discordant pairs (x_k, y_k) precomputed and supplied instead of \code{x} and \code{y}. \code{a}, \code{b}, \code{c}, and \code{d} are the counts of all (TRUE, TRUE), (TRUE, FALSE), (FALSE, TRUE), and (FALSE, FALSE) pairs, respectively. A function specified as \code{PREFUN} parameter has mandatory arguments \code{x}, \code{y}, \code{p}, and \code{reg_entry}, with \code{y} and \code{p} possibly being \code{NULL} depending on the task at hand. \code{x} and \code{y} are the data objects, \code{p} is a (possibly empty) list with all specified proximity parameters, and \code{reg_entry} is the registry entry (a named list containing all information specified in \code{reg_add}). The preprocessing function is allowed to change all these information, and if so, is required to return *all* arguments as a named list in the same order. A function specified as \code{POSTFUN} parameter has two mandatory arguments: \code{result} and \code{p}. \code{result} will contain the computed raw data, i.e. a vector of length \eqn{n * (n - 1) / 2} for auto-distances (see \code{\link[stats]{dist}} for details on \code{dist} objects), or a matrix for cross-distances. \code{p} contains the specified proximity parameters. Post-processing functions need to return the \code{result} object (even if unmodified). A function specified as \code{convert} parameter should preserve the type of its argument. } \author{David Meyer \email{David.Meyer@R-project.org}} \seealso{\code{\link{dist}}} \examples{ ## create a new distance measure mydist <- function(x,y) x * y ## create a new entry in the registry with two aliases pr_DB$set_entry(FUN = mydist, names = c("test", "mydist")) ## look it up (index is case insensitive): pr_DB$get_entry("TEST") ## modify the content of the description field in the new entry pr_DB$modify_entry(names = "test", description = "foo function") ## create a new field pr_DB$set_field("New") ## look up the test entry again (two ways) pr_DB$get_entry("test") pr_DB[["test"]] ## show total number of entries length(pr_DB) ## show all entries (short list) pr_DB$get_entries(pattern = "foo") ## show more details summary(pr_DB, "long") ## get all entries in a list (and extract first two ones) pr_DB$get_entries()[1:2] ## get all entries as a data frame (select first 3 fields) as.data.frame(pr_DB)[,1:3] ## delete test entry pr_DB$delete_entry("test") ## check if it is really gone pr_DB$entry_exists("test") } \keyword{cluster}% __ONLY ONE__ keyword per line proxy/TODO0000755000175100001440000000037210702641354012150 0ustar hornikusers Possible future work o integrate computation on sparse matrices. o provide cutoff values in proximity computation. o provide computation of centroids. Possible problems o as.simil and as.dist change the objects class before calling FUN. proxy/DESCRIPTION0000644000175100001440000000150414250310004013144 0ustar hornikusersPackage: proxy Type: Package Title: Distance and Similarity Measures Version: 0.4-27 Authors@R: c(person(given = "David", family = "Meyer", role = c("aut", "cre"), email = "David.Meyer@R-project.org"), person(given = "Christian", family = "Buchta", role = "aut")) Description: Provides an extensible framework for the efficient calculation of auto- and cross-proximities, along with implementations of the most popular ones. Depends: R (>= 3.4.0) Imports: stats, utils Suggests: cba Collate: registry.R database.R dist.R similarities.R dissimilarities.R util.R seal.R License: GPL-2 NeedsCompilation: yes Packaged: 2022-06-08 21:36:02 UTC; meyer Author: David Meyer [aut, cre], Christian Buchta [aut] Maintainer: David Meyer Repository: CRAN Date/Publication: 2022-06-09 06:15:32 UTC proxy/build/0000755000175100001440000000000014250213102012536 5ustar hornikusersproxy/build/vignette.rds0000644000175100001440000000040214250213102015071 0ustar hornikusersuQn0 u-I@~`|E.aW Lk de -HHqg3؆ 9ށĒ'}@s lL^uk7:Y#/VnH'?aFfnҋWR֖ i@ WPA|\c_X8JD*ށ ;P9Ƕxîf9ц*%\zsv̗S6͑?(.AZ~)proxy/tests/0000755000175100001440000000000013651074225012620 5ustar hornikusersproxy/tests/distance.Rout.save0000755000175100001440000002627713175160705016243 0ustar hornikusers R version 3.4.2 (2017-09-28) -- "Short Summer" Copyright (C) 2017 The R Foundation for Statistical Computing Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > ### ceeboo 2007 > > ## todo: special values NaN, NA, and Inf > > library("proxy") Attaching package: 'proxy' The following objects are masked from 'package:stats': as.dist, dist The following object is masked from 'package:base': as.matrix > > set.seed(20070630) > > ## > > x <- matrix(runif(20),5,4) > inherits(x, "matrix") [1] TRUE > rownames(x) <- LETTERS[1:5] > y <- x > y[2,] <- x[1,] <- 0 > > x [,1] [,2] [,3] [,4] A 0.0000000 0.000000000 0.0000000 0.0000000 B 0.8390691 0.002929016 0.3449861 0.4751930 C 0.0377036 0.300615279 0.1399626 0.5143060 D 0.7621676 0.963274350 0.4358104 0.7934745 E 0.1211588 0.452265898 0.8005272 0.3062947 > y [,1] [,2] [,3] [,4] A 0.4595829 0.2462831 0.6662151 0.2658000 B 0.0000000 0.0000000 0.0000000 0.0000000 C 0.0377036 0.3006153 0.1399626 0.5143060 D 0.7621676 0.9632744 0.4358104 0.7934745 E 0.1211588 0.4522659 0.8005272 0.3062947 > > ## user interfaces > > r <- .Call("R_minkowski_dist", x, NULL, FALSE, 1, PACKAGE = "proxy") > all.equal(c(r), c(stats::dist(x, method = "minkowski", p = 1))) [1] TRUE > r A B C D B 1.6621772 C 0.9925875 1.3431883 D 2.9547267 1.4463526 1.9621392 E 1.6802466 1.7916865 1.1036817 2.0039138 > .Call("R_minkowski_dist", x, x, FALSE, 1, PACKAGE = "proxy") A B C D E A 0.0000000 1.662177 0.9925875 2.954727 1.680247 B 1.6621772 0.000000 1.3431883 1.446353 1.791686 C 0.9925875 1.343188 0.0000000 1.962139 1.103682 D 2.9547267 1.446353 1.9621392 0.000000 2.003914 E 1.6802466 1.791686 1.1036817 2.003914 0.000000 > .Call("R_minkowski_dist", x, y, FALSE, 1, PACKAGE = "proxy") A B C D E A 1.6378810 0.0000000 0.9925875 2.954727 1.680247 B 1.1534623 1.6621772 1.3431883 1.446353 1.791686 C 1.2509699 0.9925875 0.0000000 1.962139 1.103682 D 1.7776551 2.9547267 1.9621392 0.000000 2.003914 E 0.7192137 1.6802466 1.1036817 2.003914 0.000000 > .Call("R_minkowski_dist", x, y, TRUE, 1, PACKAGE = "proxy") [1] 1.637881 1.662177 0.000000 0.000000 0.000000 > > dfun <- paste("R",c("euclidean", "maximum", "manhattan", "canberra", "binary", "matching", "fuzzy", "mutual"),"dist", sep = "_") > > for (f in dfun) { + cat("\nTesting ",f,"\n\n",sep="") + r <- try(do.call(".Call", list(f, x, NULL, FALSE, PACKAGE = "proxy"))) + if ( inherits(r, "try-error")) + next + s <- try(stats::dist(x, method = gsub("R_|_dist", "", f))) + if (!inherits(s, "try-error")) + print(all.equal(c(r), c(s))) + print(r) + print(do.call(".Call", list(f, x, x, FALSE, PACKAGE = "proxy"))) + print(do.call(".Call", list(f, x, y, FALSE, PACKAGE = "proxy"))) + print(do.call(".Call", list(f, x, y, TRUE, PACKAGE = "proxy"))) + } Testing R_euclidean_dist [1] TRUE A B C D B 1.0241432 C 0.6130998 0.8799820 D 1.5258864 1.0186899 1.0627446 E 0.9766699 0.9763929 0.7138466 1.0209722 A B C D E A 0.0000000 1.0241432 0.6130998 1.525886 0.9766699 B 1.0241432 0.0000000 0.8799820 1.018690 0.9763929 C 0.6130998 0.8799820 0.0000000 1.062745 0.7138466 D 1.5258864 1.0186899 1.0627446 0.000000 1.0209722 E 0.9766699 0.9763929 0.7138466 1.020972 0.0000000 A B C D E A 0.8867716 0.0000000 0.6130998 1.525886 0.9766699 B 0.5918315 1.0241432 0.8799820 1.018690 0.9763929 C 0.7208543 0.6130998 0.0000000 1.062745 0.7138466 D 0.9680706 1.5258864 1.0627446 0.000000 1.0209722 E 0.4202848 0.9766699 0.7138466 1.020972 0.0000000 [1] 0.8867716 1.0241432 0.0000000 0.0000000 0.0000000 Testing R_maximum_dist [1] TRUE A B C D B 0.8390691 C 0.5143060 0.8013655 D 0.9632744 0.9603453 0.7244640 E 0.8005272 0.7179103 0.6605646 0.6410087 A B C D E A 0.0000000 0.8390691 0.5143060 0.9632744 0.8005272 B 0.8390691 0.0000000 0.8013655 0.9603453 0.7179103 C 0.5143060 0.8013655 0.0000000 0.7244640 0.6605646 D 0.9632744 0.9603453 0.7244640 0.0000000 0.6410087 E 0.8005272 0.7179103 0.6605646 0.6410087 0.0000000 A B C D E A 0.6662151 0.0000000 0.5143060 0.9632744 0.8005272 B 0.3794862 0.8390691 0.8013655 0.9603453 0.7179103 C 0.5262524 0.5143060 0.0000000 0.7244640 0.6605646 D 0.7169912 0.9632744 0.7244640 0.0000000 0.6410087 E 0.3384240 0.8005272 0.6605646 0.6410087 0.0000000 [1] 0.6662151 0.8390691 0.0000000 0.0000000 0.0000000 Testing R_manhattan_dist [1] TRUE A B C D B 1.6621772 C 0.9925875 1.3431883 D 2.9547267 1.4463526 1.9621392 E 1.6802466 1.7916865 1.1036817 2.0039138 A B C D E A 0.0000000 1.662177 0.9925875 2.954727 1.680247 B 1.6621772 0.000000 1.3431883 1.446353 1.791686 C 0.9925875 1.343188 0.0000000 1.962139 1.103682 D 2.9547267 1.446353 1.9621392 0.000000 2.003914 E 1.6802466 1.791686 1.1036817 2.003914 0.000000 A B C D E A 1.6378810 0.0000000 0.9925875 2.954727 1.680247 B 1.1534623 1.6621772 1.3431883 1.446353 1.791686 C 1.2509699 0.9925875 0.0000000 1.962139 1.103682 D 1.7776551 2.9547267 1.9621392 0.000000 2.003914 E 0.7192137 1.6802466 1.1036817 2.003914 0.000000 [1] 1.637881 1.662177 0.000000 0.000000 0.000000 Testing R_canberra_dist [1] TRUE A B C D B 4.000000 C 4.000000 2.356997 D 4.000000 1.409165 2.157322 E 4.000000 2.348575 1.682606 1.824656 A B C D E A NA 4.000000 4.000000 4.000000 4.000000 B 4 0.000000 2.356997 1.409165 2.348575 C 4 2.356997 0.000000 2.157322 1.682606 D 4 1.409165 2.157322 0.000000 1.824656 E 4 2.348575 1.682606 1.824656 0.000000 A B C D E A 4.000000 NA 4.000000 4.000000 4.000000 B 1.868964 4 2.356997 1.409165 2.348575 C 1.919038 4 0.000000 2.157322 1.682606 D 1.547657 4 2.157322 0.000000 1.824656 E 1.039972 4 1.682606 1.824656 0.000000 [1] 4 4 0 0 0 Testing R_binary_dist [1] TRUE A B C D B 1 C 1 0 D 1 0 0 E 1 0 0 0 A B C D E A 0 1 1 1 1 B 1 0 0 0 0 C 1 0 0 0 0 D 1 0 0 0 0 E 1 0 0 0 0 A B C D E A 1 0 1 1 1 B 0 1 0 0 0 C 0 1 0 0 0 D 0 1 0 0 0 E 0 1 0 0 0 [1] 1 1 0 0 0 Testing R_matching_dist Error in .Call("R_matching_dist", structure(c(0, 0.839069103589281, 0.037703595822677, : "R_matching_dist" not available for .Call() for package "proxy" Testing R_fuzzy_dist Error in stats::dist(x, method = gsub("R_|_dist", "", f)) : invalid distance method A B C D B 1.0000000 C 1.0000000 0.6719380 D 1.0000000 0.4770877 0.6640679 E 1.0000000 0.6979540 0.5844973 0.6036897 A B C D E A 0 1.0000000 1.0000000 1.0000000 1.0000000 B 1 0.0000000 0.6719380 0.4770877 0.6979540 C 1 0.6719380 0.0000000 0.6640679 0.5844973 D 1 0.4770877 0.6640679 0.0000000 0.6036897 E 1 0.6979540 0.5844973 0.6036897 0.0000000 A B C D E A 1.0000000 0 1.0000000 1.0000000 1.0000000 B 0.5180002 1 0.6719380 0.4770877 0.6979540 C 0.6445909 1 0.0000000 0.6640679 0.5844973 D 0.5581104 1 0.6640679 0.0000000 0.6036897 E 0.3562808 1 0.5844973 0.6036897 0.0000000 [1] 1 1 0 0 0 Testing R_mutual_dist Error in .Call("R_mutual_dist", structure(c(0, 0.839069103589281, 0.037703595822677, : "R_mutual_dist" not available for .Call() for package "proxy" > > ## no longer optimized > > .Call("R_ejaccard", x, NULL, FALSE, PACKAGE = "proxy") A B C D B 0.0000000 C 0.0000000 0.2957500 D 0.0000000 0.5298998 0.4107824 E 0.0000000 0.3549987 0.4459170 0.5179277 > .Call("R_ejaccard", x, x, FALSE, PACKAGE = "proxy") A B C D E A 1 0.0000000 0.0000000 0.0000000 0.0000000 B 0 1.0000000 0.2957500 0.5298998 0.3549987 C 0 0.2957500 1.0000000 0.4107824 0.4459170 D 0 0.5298998 0.4107824 1.0000000 0.5179277 E 0 0.3549987 0.4459170 0.5179277 1.0000000 > .Call("R_ejaccard", x, y, FALSE, PACKAGE = "proxy") A B C D E A 0.0000000 1 0.0000000 0.0000000 0.0000000 B 0.6794648 0 0.2957500 0.5298998 0.3549987 C 0.3820854 0 1.0000000 0.4107824 0.4459170 D 0.5374164 0 0.4107824 1.0000000 0.5179277 E 0.8157020 0 0.4459170 0.5179277 1.0000000 > .Call("R_ejaccard", x, y, TRUE, PACKAGE = "proxy") [1] 0 0 1 1 1 > > .Call("R_cosine", x, NULL, FALSE, PACKAGE = "proxy") A B C D B 0.0000000 C 0.0000000 0.5179092 D 0.0000000 0.7485219 0.8416675 E 0.0000000 0.5245741 0.6848730 0.7514756 > .Call("R_cosine", x, x, FALSE, PACKAGE = "proxy") A B C D E A 1 0.0000000 0.0000000 0.0000000 0.0000000 B 0 1.0000000 0.5179092 0.7485219 0.5245741 C 0 0.5179092 1.0000000 0.8416675 0.6848730 D 0 0.7485219 0.8416675 1.0000000 0.7514756 E 0 0.5245741 0.6848730 0.7514756 1.0000000 > .Call("R_cosine", x, y, FALSE, PACKAGE = "proxy") A B C D E A 0.0000000 1 0.0000000 0.0000000 0.0000000 B 0.8175510 0 0.5179092 0.7485219 0.5245741 C 0.5909955 0 1.0000000 0.8416675 0.6848730 D 0.8046383 0 0.8416675 1.0000000 0.7514756 E 0.9026897 0 0.6848730 0.7514756 1.0000000 > .Call("R_cosine", x, y, TRUE, PACKAGE = "proxy") [1] 0 0 1 1 1 > > x <- matrix(x > 0.5, 5,4) > y <- matrix(y > 0.5, 5,4) > > .Call("R_bjaccard", x, NULL, FALSE, PACKAGE = "proxy") 1 2 3 4 2 0.0000000 3 0.0000000 0.0000000 4 0.0000000 0.3333333 0.3333333 5 0.0000000 0.0000000 0.0000000 0.0000000 > .Call("R_bjaccard", x, x, FALSE, PACKAGE = "proxy") [,1] [,2] [,3] [,4] [,5] [1,] 1 0.0000000 0.0000000 0.0000000 0 [2,] 0 1.0000000 0.0000000 0.3333333 0 [3,] 0 0.0000000 1.0000000 0.3333333 0 [4,] 0 0.3333333 0.3333333 1.0000000 0 [5,] 0 0.0000000 0.0000000 0.0000000 1 > .Call("R_bjaccard", x, y, FALSE, PACKAGE = "proxy") [,1] [,2] [,3] [,4] [,5] [1,] 0 1 0.0000000 0.0000000 0 [2,] 0 0 0.0000000 0.3333333 0 [3,] 0 0 1.0000000 0.3333333 0 [4,] 0 0 0.3333333 1.0000000 0 [5,] 1 0 0.0000000 0.0000000 1 > .Call("R_bjaccard", x, y, TRUE, PACKAGE = "proxy") [1] 0 0 1 1 1 > > ### > > proc.time() user system elapsed 0.288 0.040 0.325 proxy/tests/util.Rout.save0000755000175100001440000000577513037764465015440 0ustar hornikusers R version 3.1.2 (2014-10-31) -- "Pumpkin Helmet" Copyright (C) 2014 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > ## test C interfaces > > library(proxy) Attaching package: 'proxy' The following objects are masked from 'package:stats': as.dist, dist The following object is masked from 'package:base': as.matrix > > set.seed(20070630) > > x <- as.dist(matrix(runif(25),5,5)) > x 1 2 3 4 2 0.8390691 3 0.0377036 0.3006153 4 0.7621676 0.9632744 0.4358104 5 0.1211588 0.4522659 0.8005272 0.3062947 > attributes(x) $Size [1] 5 $call as.dist.default(m = x) $class [1] "dist" $Diag [1] FALSE $Upper [1] FALSE > > z <- .Call(proxy:::R_subset_dist, x, 3) > z dist(0) > > unclass(z) numeric(0) attr(,"Size") [1] 1 attr(,"call") as.dist.default(m = x) attr(,"Diag") [1] FALSE attr(,"Upper") [1] FALSE > > .Call(proxy:::R_subset_dist, x, c(1,3,5)) 1 2 2 0.0377036 3 0.1211588 0.8005272 > > attr(x, "Labels") <- LETTERS[1:5] > > z <- .Call(proxy:::R_subset_dist, x, c("A","C","E")) > z A C C 0.0377036 E 0.1211588 0.8005272 > attributes(z) $Size [1] 3 $call as.dist.default(m = x) $class [1] "dist" $Diag [1] FALSE $Upper [1] FALSE $Labels [1] "A" "C" "E" > > attr(x, "Labels") <- NULL > > .Call(proxy:::R_rowSums_dist, x, FALSE) [1] 1.760099 2.555225 1.574656 2.467547 1.680247 > .Call(proxy:::R_rowSums_dist, z, FALSE) A C E 0.1588624 0.8382308 0.9216860 > > .Call(proxy:::R_row_dist, x, FALSE) # row() [1] 2 3 4 5 3 4 5 4 5 5 > .Call(proxy:::R_row_dist, x, TRUE) # col() [1] 1 1 1 1 2 2 2 3 3 4 > > ## test R interfaces > > dim(x) [1] 5 5 > dimnames(x) <- letters[1:5] > dimnames(x) [1] "a" "b" "c" "d" "e" > names(x) <- LETTERS[1:5] > names(x) [1] "A" "B" "C" "D" "E" > > row.dist(x) [1] 2 3 4 5 3 4 5 4 5 5 > col.dist(x) [1] 1 1 1 1 2 2 2 3 3 4 > > subset(x, c(1,3,5)) A C C 0.0377036 E 0.1211588 0.8005272 > x[[c(1,3,5)]] A C C 0.0377036 E 0.1211588 0.8005272 > x[c(1,3,5)] # as usual [1] 0.8390691 0.7621676 0.3006153 > > x[[-1]] # drop subscripts B C D C 0.3006153 D 0.9632744 0.4358104 E 0.4522659 0.8005272 0.3062947 > > x[[1]] # empty dist(0) > > ### > > proc.time() user system elapsed 0.193 0.030 0.216 proxy/tests/util.R0000755000175100001440000000151213037764465013734 0ustar hornikusers ## test C interfaces library(proxy) set.seed(20070630) x <- as.dist(matrix(runif(25),5,5)) x attributes(x) z <- .Call(proxy:::R_subset_dist, x, 3) z unclass(z) .Call(proxy:::R_subset_dist, x, c(1,3,5)) attr(x, "Labels") <- LETTERS[1:5] z <- .Call(proxy:::R_subset_dist, x, c("A","C","E")) z attributes(z) attr(x, "Labels") <- NULL .Call(proxy:::R_rowSums_dist, x, FALSE) .Call(proxy:::R_rowSums_dist, z, FALSE) .Call(proxy:::R_row_dist, x, FALSE) # row() .Call(proxy:::R_row_dist, x, TRUE) # col() ## test R interfaces dim(x) dimnames(x) <- letters[1:5] dimnames(x) names(x) <- LETTERS[1:5] names(x) row.dist(x) col.dist(x) subset(x, c(1,3,5)) x[[c(1,3,5)]] x[c(1,3,5)] # as usual x[[-1]] # drop subscripts x[[1]] # empty ### proxy/tests/distcalls.Rout.save0000644000175100001440000001031514246412662016414 0ustar hornikusers R version 4.2.0 (2022-04-22) -- "Vigorous Calisthenics" Copyright (C) 2022 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ################################# > ## Test for dist calls > ################################## > > library(proxy) Attaching package: 'proxy' The following objects are masked from 'package:stats': as.dist, dist The following object is masked from 'package:base': as.matrix > > set.seed(20140107) > > ## get all measures > proxies = pr_DB$get_entry_names() > > ## remove special cases > proxies = setdiff(proxies, c("Mahalanobis", "Minkowski", "Stiles", "Levenshtein", "fJaccard")) > > ## create test data > x = matrix(1:100, 10) > > ## test function: checks if dist(x) == dist(x,x) for all measures, > ## and if diag(dist(x, x)) == diag(x, x, pairwise = TRUE) > prtest <- function(...) { + CD <- dist(x, x, ...) + all(as.matrix(dist(x, ...)) == CD) && + all(diag(CD) == dist(x, x, pairwise = TRUE, ...)) + } > > ## loop over all measures (except special cases) > for (i in proxies) + {cat(i); prtest(i); cat(": OK.\n")} Jaccard: OK. Kulczynski1: OK. Kulczynski2: OK. Mountford: OK. Fager: OK. Russel: OK. simple matching: OK. Hamman: OK. Faith: OK. Tanimoto: OK. Dice: OK. Phi: OK. Michael: OK. Mozley: OK. Yule: OK. Yule2: OK. Ochiai: OK. Simpson: OK. Braun-Blanquet: OK. cosine: OK. angular: OK. eJaccard: OK. eDice: OK. correlation: OK. Chi-squared: OK. Phi-squared: OK. Tschuprow: OK. Cramer: OK. Pearson: OK. Gower: OK. Euclidean: OK. Bhjattacharyya: OK. Manhattan: OK. supremum: OK. Canberra: OK. Wave: OK. divergence: OK. Kullback: OK. Bray: OK. Soergel: OK. Podani: OK. Chord: OK. Geodesic: OK. Whittaker: OK. Hellinger: OK. > > ## Minkowski > for (j in c(0.5, 1, 2, 3, Inf)) + {cat("Minkowski: p =", j); prtest("Minkowski", p = j); cat(": OK.\n")} Minkowski: p = 0.5: OK. Minkowski: p = 1: OK. Minkowski: p = 2: OK. Minkowski: p = 3: OK. Minkowski: p = Inf: OK. > > ## Mahalanobis (need non-singular matrix) > x = as.matrix(iris[1:50,-5]) > prtest("Mahalanobis") [1] TRUE > > ## fJaccard (needs values in unit interval) > x = as.matrix(1:100/100, 10) > prtest("fJaccard") [1] TRUE > > ## produce binary matrix > x = matrix(rbinom(100,1,0.7), 10) > > ## Stiles (gives a lot of warnings due to log) > tmp = dist(x, "Stiles") There were 27 warnings (use warnings() to see them) > tmp = dist(x, x, "Stiles") There were 50 or more warnings (use warnings() to see the first 50) > > ## try again (almost) all measures, this time with binary data to check > ## conversions > for (i in proxies) + {cat(i); prtest(i); cat(": OK.\n")} Jaccard: OK. Kulczynski1: OK. Kulczynski2: OK. Mountford: OK. Fager: OK. Russel: OK. simple matching: OK. Hamman: OK. Faith: OK. Tanimoto: OK. Dice: OK. Phi: OK. Michael: OK. Mozley: OK. Yule: OK. Yule2: OK. Ochiai: OK. Simpson: OK. Braun-Blanquet: OK. cosine: OK. angular: OK. eJaccard: OK. eDice: OK. correlation: OK. Chi-squared: OK. Phi-squared: OK. Tschuprow: OK. Cramer: OK. Pearson: OK. Gower: OK. Euclidean: OK. Bhjattacharyya: OK. Manhattan: OK. supremum: OK. Canberra: OK. Wave: OK. divergence: OK. Kullback: OK. Bray: OK. Soergel: OK. Podani: OK. Chord: OK. Geodesic: OK. Whittaker: OK. Hellinger: OK. > ## Minkowski > for (j in c(0.5, 1, 2, 3, Inf)) + {cat("Minkowski: p =", j); prtest("Minkowski", p = j); cat(": OK.\n")} Minkowski: p = 0.5: OK. Minkowski: p = 1: OK. Minkowski: p = 2: OK. Minkowski: p = 3: OK. Minkowski: p = Inf: OK. > > ## Levenshtein distance > s <- c("A", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog") > all(as.matrix(dist(s, "Levenshtein")) == dist(s, s, "Levenshtein")) [1] TRUE > > ## Test auto-conversion > x = iris[,-5] > prtest() [1] TRUE > > > > proc.time() user system elapsed 0.585 0.012 0.591 proxy/tests/registry.R0000755000175100001440000000353210643260444014617 0ustar hornikusers########################## ### registry test instances library(proxy) .my_check_fun <- function(x) if (x$Z == 999 && x$New2 == 999) stop("No evil allowed!") ## create registry R <- proxy:::registry(entry_class = "simple.list", validity_FUN = .my_check_fun) R ## set fields R$set_field("X", type = TRUE, is_mandatory = TRUE) R$set_field("Y", type = "character") R$set_field("Z", default = 123) R$get_fields() ## add entries R$set_entry(names = "test", X = TRUE, Y = "bla") R$set_entry(names = "test2", X = FALSE, Y = "foo", Z = 99) R$set_entry(names = "test3", X = FALSE, Y = "bar", Z = "chars") R$get_entry("test") R[["test2"]] R[["test3"]] ## add new field R$set_field("New") R$get_field("New") ## change entries R$modify_entry(names = "test", New = 123) R$modify_entry(names = "test2", New = "test") ## field check function (checks for strict positive values) R$set_field("New2", type = "numeric", validity_FUN = function(x) stopifnot(x > 0)) R$set_entry(names = "test5", X = TRUE, New2 = 2) ## add field with fixed alternatives R$set_field("New3", type = c("A", "B")) R$get_field("New") R$set_entry(names = "test6", X = TRUE, New3 = "A") ## print/summary = as.data.frame R summary(R) ## seal entries R$seal_entries() R$set_field("New4") R$set_entry(names = "test7", X = TRUE, Y = "bla") R$delete_entry("test7") R$modify_entry(names = "test", New4 = "test") ## error cases: TRY <- function(...) stopifnot(inherits(try(..., silent = TRUE), "try-error")) TRY(R$set_field("bla", type = "character", default = 123)) TRY(R$set_entry("err1", Y = "bla")) TRY(R$set_entry("err2", X = "bla")) TRY(R$set_entry("err3", X = TRUE, New2 = -2)) TRY(R$set_entry("err4", X = TRUE, Z = 999, New2 = 999)) TRY(R$set_entry("err5", X = TRUE, New3 = "C")) TRY(R$modify_entry("Bla", "New", 123)) TRY(R$modify_entry("X", "Bla", 123)) TRY(R$modify_entry("test","X",TRUE)) proxy/tests/distance.R0000755000175100001440000000357513175160705014552 0ustar hornikusers ### ceeboo 2007 ## todo: special values NaN, NA, and Inf library("proxy") set.seed(20070630) ## x <- matrix(runif(20),5,4) inherits(x, "matrix") rownames(x) <- LETTERS[1:5] y <- x y[2,] <- x[1,] <- 0 x y ## user interfaces r <- .Call("R_minkowski_dist", x, NULL, FALSE, 1, PACKAGE = "proxy") all.equal(c(r), c(stats::dist(x, method = "minkowski", p = 1))) r .Call("R_minkowski_dist", x, x, FALSE, 1, PACKAGE = "proxy") .Call("R_minkowski_dist", x, y, FALSE, 1, PACKAGE = "proxy") .Call("R_minkowski_dist", x, y, TRUE, 1, PACKAGE = "proxy") dfun <- paste("R",c("euclidean", "maximum", "manhattan", "canberra", "binary", "matching", "fuzzy", "mutual"),"dist", sep = "_") for (f in dfun) { cat("\nTesting ",f,"\n\n",sep="") r <- try(do.call(".Call", list(f, x, NULL, FALSE, PACKAGE = "proxy"))) if ( inherits(r, "try-error")) next s <- try(stats::dist(x, method = gsub("R_|_dist", "", f))) if (!inherits(s, "try-error")) print(all.equal(c(r), c(s))) print(r) print(do.call(".Call", list(f, x, x, FALSE, PACKAGE = "proxy"))) print(do.call(".Call", list(f, x, y, FALSE, PACKAGE = "proxy"))) print(do.call(".Call", list(f, x, y, TRUE, PACKAGE = "proxy"))) } ## no longer optimized .Call("R_ejaccard", x, NULL, FALSE, PACKAGE = "proxy") .Call("R_ejaccard", x, x, FALSE, PACKAGE = "proxy") .Call("R_ejaccard", x, y, FALSE, PACKAGE = "proxy") .Call("R_ejaccard", x, y, TRUE, PACKAGE = "proxy") .Call("R_cosine", x, NULL, FALSE, PACKAGE = "proxy") .Call("R_cosine", x, x, FALSE, PACKAGE = "proxy") .Call("R_cosine", x, y, FALSE, PACKAGE = "proxy") .Call("R_cosine", x, y, TRUE, PACKAGE = "proxy") x <- matrix(x > 0.5, 5,4) y <- matrix(y > 0.5, 5,4) .Call("R_bjaccard", x, NULL, FALSE, PACKAGE = "proxy") .Call("R_bjaccard", x, x, FALSE, PACKAGE = "proxy") .Call("R_bjaccard", x, y, FALSE, PACKAGE = "proxy") .Call("R_bjaccard", x, y, TRUE, PACKAGE = "proxy") ### proxy/tests/registry.Rout.save0000755000175100001440000000757412464233322016314 0ustar hornikusers R version 3.1.2 (2014-10-31) -- "Pumpkin Helmet" Copyright (C) 2014 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ########################## > ### registry test instances > > library(proxy) Attaching package: 'proxy' The following objects are masked from 'package:stats': as.dist, dist The following object is masked from 'package:base': as.matrix > > .my_check_fun <- function(x) if (x$Z == 999 && x$New2 == 999) stop("No evil allowed!") > > ## create registry > R <- proxy:::registry(entry_class = "simple.list", + validity_FUN = .my_check_fun) > R An object of class "registry" with no entry. > > ## set fields > R$set_field("X", type = TRUE, is_mandatory = TRUE) > R$set_field("Y", type = "character") > R$set_field("Z", default = 123) > R$get_fields() $names type character default NA is_mandatory TRUE is_modifiable TRUE validity_FUN NULL $X type logical default NA is_mandatory TRUE is_modifiable TRUE validity_FUN NULL $Y type character default NA is_mandatory FALSE is_modifiable TRUE validity_FUN NULL $Z type NA default 123 is_mandatory FALSE is_modifiable TRUE validity_FUN NULL > > ## add entries > R$set_entry(names = "test", X = TRUE, Y = "bla") > R$set_entry(names = "test2", X = FALSE, Y = "foo", Z = 99) > R$set_entry(names = "test3", X = FALSE, Y = "bar", Z = "chars") > R$get_entry("test") _ names test X TRUE Y bla Z 123 > R[["test2"]] _ names test2 X FALSE Y foo Z 99 > R[["test3"]] _ names test3 X FALSE Y bar Z chars > > ## add new field > R$set_field("New") > R$get_field("New") type NA default NA is_mandatory FALSE is_modifiable TRUE validity_FUN NULL > > ## change entries > R$modify_entry(names = "test", New = 123) > R$modify_entry(names = "test2", New = "test") > > ## field check function (checks for strict positive values) > R$set_field("New2", type = "numeric", validity_FUN = function(x) stopifnot(x > 0)) > R$set_entry(names = "test5", X = TRUE, New2 = 2) > > ## add field with fixed alternatives > R$set_field("New3", type = c("A", "B")) > R$get_field("New") type NA default NA is_mandatory FALSE is_modifiable TRUE validity_FUN NULL > R$set_entry(names = "test6", X = TRUE, New3 = "A") > > ## print/summary = as.data.frame > R An object of class "registry" with 5 entries. > summary(R) X Y Z New New2 New3 test TRUE bla 123 123 NA test2 FALSE foo 99 test NA test3 FALSE bar chars NA test5 TRUE 123 2 test6 TRUE 123 NA A > > ## seal entries > R$seal_entries() > R$set_field("New4") > R$set_entry(names = "test7", X = TRUE, Y = "bla") > R$delete_entry("test7") > R$modify_entry(names = "test", New4 = "test") > > ## error cases: > TRY <- function(...) stopifnot(inherits(try(..., silent = TRUE), "try-error")) > TRY(R$set_field("bla", type = "character", default = 123)) > TRY(R$set_entry("err1", Y = "bla")) > TRY(R$set_entry("err2", X = "bla")) > TRY(R$set_entry("err3", X = TRUE, New2 = -2)) > TRY(R$set_entry("err4", X = TRUE, Z = 999, New2 = 999)) > TRY(R$set_entry("err5", X = TRUE, New3 = "C")) > TRY(R$modify_entry("Bla", "New", 123)) > TRY(R$modify_entry("X", "Bla", 123)) > TRY(R$modify_entry("test","X",TRUE)) > > proc.time() user system elapsed 0.226 0.011 0.230 proxy/tests/distcalls.R0000755000175100001440000000322613037764465014745 0ustar hornikusers################################# ## Test for dist calls ################################## library(proxy) set.seed(20140107) ## get all measures proxies = pr_DB$get_entry_names() ## remove special cases proxies = setdiff(proxies, c("Mahalanobis", "Minkowski", "Stiles", "Levenshtein", "fJaccard")) ## create test data x = matrix(1:100, 10) ## test function: checks if dist(x) == dist(x,x) for all measures, ## and if diag(dist(x, x)) == diag(x, x, pairwise = TRUE) prtest <- function(...) { CD <- dist(x, x, ...) all(as.matrix(dist(x, ...)) == CD) && all(diag(CD) == dist(x, x, pairwise = TRUE, ...)) } ## loop over all measures (except special cases) for (i in proxies) {cat(i); prtest(i); cat(": OK.\n")} ## Minkowski for (j in c(0.5, 1, 2, 3, Inf)) {cat("Minkowski: p =", j); prtest("Minkowski", p = j); cat(": OK.\n")} ## Mahalanobis (need non-singular matrix) x = as.matrix(iris[1:50,-5]) prtest("Mahalanobis") ## fJaccard (needs values in unit interval) x = as.matrix(1:100/100, 10) prtest("fJaccard") ## produce binary matrix x = matrix(rbinom(100,1,0.7), 10) ## Stiles (gives a lot of warnings due to log) tmp = dist(x, "Stiles") tmp = dist(x, x, "Stiles") ## try again (almost) all measures, this time with binary data to check ## conversions for (i in proxies) {cat(i); prtest(i); cat(": OK.\n")} ## Minkowski for (j in c(0.5, 1, 2, 3, Inf)) {cat("Minkowski: p =", j); prtest("Minkowski", p = j); cat(": OK.\n")} ## Levenshtein distance s <- c("A", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog") all(as.matrix(dist(s, "Levenshtein")) == dist(s, s, "Levenshtein")) ## Test auto-conversion x = iris[,-5] prtest() proxy/tests/apply.R0000755000175100001440000000543413651074111014073 0ustar hornikusers ## tests on apply C wrappers library(proxy) set.seed(20070630) ## matrix f <- function(x, y) sum(x*y) / sqrt(sum(x*x)) / sqrt(sum(y*y)) x <- matrix(runif(20), 5, 4) x y <- matrix(runif(20), 5, 4) y .External(proxy:::R_apply_dist_matrix, x, NULL, FALSE, f) .External(proxy:::R_apply_dist_matrix, x, x, FALSE, f) .External(proxy:::R_apply_dist_matrix, x, y, FALSE, f) .External(proxy:::R_apply_dist_matrix, x, y, TRUE, f) # coerce z <- y * 100 storage.mode(z) <- "integer" .External(proxy:::R_apply_dist_matrix, x, z, FALSE, f) .External(proxy:::R_apply_dist_matrix, z, x, FALSE, f) .External(proxy:::R_apply_dist_matrix, z, z, FALSE, f) .External(proxy:::R_apply_dist_matrix, z, NULL, FALSE, f) ## list x <- unlist(apply(x, 1, list), recursive = FALSE) x y <- unlist(apply(y, 1, list), recursive = FALSE) .External(proxy:::R_apply_dist_list, x, NULL, FALSE, f) .External(proxy:::R_apply_dist_list, x, x, FALSE, f) .External(proxy:::R_apply_dist_list, x, y, FALSE, f) .External(proxy:::R_apply_dist_list, x, y, TRUE, f) ## logical matrix f <- function(a, b, c, d, n) a / sqrt(a+b) / sqrt(a+c) x <- t(sapply(x, ">", 0.5)) x y <- t(sapply(y, ">", 0.5)) .External(proxy:::R_apply_dist_binary_matrix, x, NULL, FALSE, f) .External(proxy:::R_apply_dist_binary_matrix, x, x, FALSE, f) .External(proxy:::R_apply_dist_binary_matrix, x, y, FALSE, f) .External(proxy:::R_apply_dist_binary_matrix, x, y, TRUE, f) ## data.frame f <- function(x, y) sum(x*y) / sqrt(sum(x*x)) / sqrt(sum(y*y)) x <- data.frame(unlist(apply(x, 2, list), recursive = FALSE)) names(x) <- letters[1:4] x y <- data.frame(unlist(apply(y, 2, list), recursive = FALSE)) names(y) <- letters[1:4] .External(proxy:::R_apply_dist_data_frame, x, NULL, FALSE, f) .External(proxy:::R_apply_dist_data_frame, x, x, FALSE, f) .External(proxy:::R_apply_dist_data_frame, x, y, FALSE, f) .External(proxy:::R_apply_dist_data_frame, x, y, TRUE, f) # f <- function(x, y) { if (rownames(x) == 1 && rownames(y) == 1) { print(x) str(x) print(y) } sum(x == y) / length(x) } x <- data.frame(1:5, LETTERS[1:5], stringsAsFactors = FALSE) x y <- data.frame(1:6, LETTERS[c(1,1:5)], row.names = letters[1:6], stringsAsFactors = FALSE) y all.equal(x, y) identical(attributes(x[[1]]), attributes(y[[1]])) identical(attributes(x[[2]]), attributes(y[[2]])) .External(proxy:::R_apply_dist_data_frame, x, NULL, FALSE, f) .External(proxy:::R_apply_dist_data_frame, x, x, FALSE, f) .External(proxy:::R_apply_dist_data_frame, x, y, FALSE, f) .External(proxy:::R_apply_dist_data_frame, x, y[-6,], TRUE, f) # test parameters f <- function(x, y, p = 1) sum(x == y) / length(x) * p .External(proxy:::R_apply_dist_data_frame, x, y[-6,], TRUE, f) .External(proxy:::R_apply_dist_data_frame, x, y[-6,], TRUE, f, p = 2) ### proxy/tests/apply.Rout.save0000755000175100001440000002633413651075270015571 0ustar hornikusers R version 3.6.3 (2020-02-29) -- "Holding the Windsock" Copyright (C) 2020 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > ## tests on apply C wrappers > > library(proxy) Attaching package: 'proxy' The following objects are masked from 'package:stats': as.dist, dist The following object is masked from 'package:base': as.matrix > > set.seed(20070630) > > ## matrix > > f <- function(x, y) sum(x*y) / sqrt(sum(x*x)) / sqrt(sum(y*y)) > > x <- matrix(runif(20), 5, 4) > x [,1] [,2] [,3] [,4] [1,] 0.4595829 0.246283120 0.6662151 0.2658000 [2,] 0.8390691 0.002929016 0.3449861 0.4751930 [3,] 0.0377036 0.300615279 0.1399626 0.5143060 [4,] 0.7621676 0.963274350 0.4358104 0.7934745 [5,] 0.1211588 0.452265898 0.8005272 0.3062947 > y <- matrix(runif(20), 5, 4) > y [,1] [,2] [,3] [,4] [1,] 0.3435311 0.45003283 0.1931588 0.22999466 [2,] 0.4417228 0.78191550 0.9028450 0.01728483 [3,] 0.3872093 0.45154308 0.7324979 0.52502253 [4,] 0.9119128 0.21720445 0.3986998 0.72623146 [5,] 0.9374529 0.04343485 0.8170873 0.69974714 > > .External(proxy:::R_apply_dist_matrix, x, NULL, FALSE, f) 1 2 3 4 2 0.8175510 3 0.5909955 0.5179092 4 0.8046383 0.7485219 0.8416675 5 0.9026897 0.5245741 0.6848730 0.7514756 > .External(proxy:::R_apply_dist_matrix, x, x, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 1.0000000 0.8175510 0.5909955 0.8046383 0.9026897 [2,] 0.8175510 1.0000000 0.5179092 0.7485219 0.5245741 [3,] 0.5909955 0.5179092 1.0000000 0.8416675 0.6848730 [4,] 0.8046383 0.7485219 0.8416675 1.0000000 0.7514756 [5,] 0.9026897 0.5245741 0.6848730 0.7514756 1.0000000 > .External(proxy:::R_apply_dist_matrix, x, y, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 0.8068091 0.8869376 0.9573450 0.8394141 0.9257044 [2,] 0.7091915 0.5310191 0.7490815 0.9743857 0.9583243 [3,] 0.7471004 0.4955903 0.7898068 0.6896626 0.5971429 [4,] 0.9837110 0.7703657 0.8896691 0.8665744 0.7655715 [5,] 0.7512326 0.9126553 0.9466108 0.6140860 0.7184058 > .External(proxy:::R_apply_dist_matrix, x, y, TRUE, f) [1] 0.8068091 0.5310191 0.7898068 0.8665744 0.7184058 > > # coerce > > z <- y * 100 > storage.mode(z) <- "integer" > .External(proxy:::R_apply_dist_matrix, x, z, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 0.8035753 0.8851147 0.9568709 0.8363210 0.9255173 [2,] 0.7027894 0.5281041 0.7457478 0.9754800 0.9584822 [3,] 0.7399077 0.4909526 0.7896421 0.6859131 0.5945390 [4,] 0.9811939 0.7674593 0.8884016 0.8638128 0.7638742 [5,] 0.7490644 0.9109332 0.9482222 0.6083438 0.7173810 > .External(proxy:::R_apply_dist_matrix, z, x, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 0.8035753 0.7027894 0.7399077 0.9811939 0.7490644 [2,] 0.8851147 0.5281041 0.4909526 0.7674593 0.9109332 [3,] 0.9568709 0.7457478 0.7896421 0.8884016 0.9482222 [4,] 0.8363210 0.9754800 0.6859131 0.8638128 0.6083438 [5,] 0.9255173 0.9584822 0.5945390 0.7638742 0.7173810 > .External(proxy:::R_apply_dist_matrix, z, z, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 1.0000000 0.8363694 0.8593771 0.8075186 0.7133480 [2,] 0.8363694 1.0000000 0.8668551 0.5849013 0.6553123 [3,] 0.8593771 0.8668551 1.0000000 0.8253313 0.8715788 [4,] 0.8075186 0.5849013 0.8253313 1.0000000 0.9495662 [5,] 0.7133480 0.6553123 0.8715788 0.9495662 1.0000000 > .External(proxy:::R_apply_dist_matrix, z, NULL, FALSE, f) 1 2 3 4 2 0.8363694 3 0.8593771 0.8668551 4 0.8075186 0.5849013 0.8253313 5 0.7133480 0.6553123 0.8715788 0.9495662 > > ## list > > x <- unlist(apply(x, 1, list), recursive = FALSE) > x [[1]] [1] 0.4595829 0.2462831 0.6662151 0.2658000 [[2]] [1] 0.839069104 0.002929016 0.344986119 0.475192965 [[3]] [1] 0.0377036 0.3006153 0.1399626 0.5143060 [[4]] [1] 0.7621676 0.9632744 0.4358104 0.7934745 [[5]] [1] 0.1211588 0.4522659 0.8005272 0.3062947 > y <- unlist(apply(y, 1, list), recursive = FALSE) > > .External(proxy:::R_apply_dist_list, x, NULL, FALSE, f) 1 2 3 4 2 0.8175510 3 0.5909955 0.5179092 4 0.8046383 0.7485219 0.8416675 5 0.9026897 0.5245741 0.6848730 0.7514756 > .External(proxy:::R_apply_dist_list, x, x, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 1.0000000 0.8175510 0.5909955 0.8046383 0.9026897 [2,] 0.8175510 1.0000000 0.5179092 0.7485219 0.5245741 [3,] 0.5909955 0.5179092 1.0000000 0.8416675 0.6848730 [4,] 0.8046383 0.7485219 0.8416675 1.0000000 0.7514756 [5,] 0.9026897 0.5245741 0.6848730 0.7514756 1.0000000 > .External(proxy:::R_apply_dist_list, x, y, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 0.8068091 0.8869376 0.9573450 0.8394141 0.9257044 [2,] 0.7091915 0.5310191 0.7490815 0.9743857 0.9583243 [3,] 0.7471004 0.4955903 0.7898068 0.6896626 0.5971429 [4,] 0.9837110 0.7703657 0.8896691 0.8665744 0.7655715 [5,] 0.7512326 0.9126553 0.9466108 0.6140860 0.7184058 > .External(proxy:::R_apply_dist_list, x, y, TRUE, f) [1] 0.8068091 0.5310191 0.7898068 0.8665744 0.7184058 > > ## logical matrix > > f <- function(a, b, c, d, n) + a / sqrt(a+b) / sqrt(a+c) > > x <- t(sapply(x, ">", 0.5)) > x [,1] [,2] [,3] [,4] [1,] FALSE FALSE TRUE FALSE [2,] TRUE FALSE FALSE FALSE [3,] FALSE FALSE FALSE TRUE [4,] TRUE TRUE FALSE TRUE [5,] FALSE FALSE TRUE FALSE > y <- t(sapply(y, ">", 0.5)) > > .External(proxy:::R_apply_dist_binary_matrix, x, NULL, FALSE, f) 1 2 3 4 2 0.0000000 3 0.0000000 0.0000000 4 0.0000000 0.5773503 0.5773503 5 1.0000000 0.0000000 0.0000000 0.0000000 > .External(proxy:::R_apply_dist_binary_matrix, x, x, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 1 0.0000000 0.0000000 0.0000000 1 [2,] 0 1.0000000 0.0000000 0.5773503 0 [3,] 0 0.0000000 1.0000000 0.5773503 0 [4,] 0 0.5773503 0.5773503 1.0000000 0 [5,] 1 0.0000000 0.0000000 0.0000000 1 > .External(proxy:::R_apply_dist_binary_matrix, x, y, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] NaN 0.7071068 0.7071068 0.0000000 0.5773503 [2,] NaN 0.0000000 0.0000000 0.7071068 0.5773503 [3,] NaN 0.0000000 0.7071068 0.7071068 0.5773503 [4,] NaN 0.4082483 0.4082483 0.8164966 0.6666667 [5,] NaN 0.7071068 0.7071068 0.0000000 0.5773503 > .External(proxy:::R_apply_dist_binary_matrix, x, y, TRUE, f) [1] NaN 0.0000000 0.7071068 0.8164966 0.5773503 > > ## data.frame > > f <- function(x, y) sum(x*y) / sqrt(sum(x*x)) / sqrt(sum(y*y)) > > x <- data.frame(unlist(apply(x, 2, list), recursive = FALSE)) > names(x) <- letters[1:4] > x a b c d 1 FALSE FALSE TRUE FALSE 2 TRUE FALSE FALSE FALSE 3 FALSE FALSE FALSE TRUE 4 TRUE TRUE FALSE TRUE 5 FALSE FALSE TRUE FALSE > y <- data.frame(unlist(apply(y, 2, list), recursive = FALSE)) > names(y) <- letters[1:4] > > .External(proxy:::R_apply_dist_data_frame, x, NULL, FALSE, f) 1 2 3 4 2 0.0000000 3 0.0000000 0.0000000 4 0.0000000 0.5773503 0.5773503 5 1.0000000 0.0000000 0.0000000 0.0000000 > .External(proxy:::R_apply_dist_data_frame, x, x, FALSE, f) 1 2 3 4 5 1 1 0.0000000 0.0000000 0.0000000 1 2 0 1.0000000 0.0000000 0.5773503 0 3 0 0.0000000 1.0000000 0.5773503 0 4 0 0.5773503 0.5773503 1.0000000 0 5 1 0.0000000 0.0000000 0.0000000 1 > .External(proxy:::R_apply_dist_data_frame, x, y, FALSE, f) 1 2 3 4 5 1 NaN 0.7071068 0.7071068 0.0000000 0.5773503 2 NaN 0.0000000 0.0000000 0.7071068 0.5773503 3 NaN 0.0000000 0.7071068 0.7071068 0.5773503 4 NaN 0.4082483 0.4082483 0.8164966 0.6666667 5 NaN 0.7071068 0.7071068 0.0000000 0.5773503 > .External(proxy:::R_apply_dist_data_frame, x, y, TRUE, f) [1] NaN 0.0000000 0.7071068 0.8164966 0.5773503 > > # > > f <- function(x, y) { + if (rownames(x) == 1 && rownames(y) == 1) { + print(x) + str(x) + print(y) + } + sum(x == y) / length(x) + } > > x <- data.frame(1:5, LETTERS[1:5], stringsAsFactors = FALSE) > x X1.5 LETTERS.1.5. 1 1 A 2 2 B 3 3 C 4 4 D 5 5 E > > y <- data.frame(1:6, LETTERS[c(1,1:5)], + row.names = letters[1:6], + stringsAsFactors = FALSE) > y X1.6 LETTERS.c.1..1.5.. a 1 A b 2 A c 3 B d 4 C e 5 D f 6 E > > all.equal(x, y) [1] "Names: 2 string mismatches" [2] "Attributes: < Component \"row.names\": Modes: numeric, character >" [3] "Attributes: < Component \"row.names\": Lengths: 5, 6 >" [4] "Attributes: < Component \"row.names\": target is numeric, current is character >" [5] "Component 1: Numeric: lengths (5, 6) differ" [6] "Component 2: Lengths (5, 6) differ (string compare on first 5)" [7] "Component 2: 4 string mismatches" > identical(attributes(x[[1]]), attributes(y[[1]])) [1] TRUE > identical(attributes(x[[2]]), attributes(y[[2]])) [1] TRUE > > .External(proxy:::R_apply_dist_data_frame, x, NULL, FALSE, f) 1 2 3 4 2 0 3 0 0 4 0 0 0 5 0 0 0 0 > .External(proxy:::R_apply_dist_data_frame, x, x, FALSE, f) X1.5 LETTERS.1.5. 1 1 A 'data.frame': 1 obs. of 2 variables: $ X1.5 : int 1 $ LETTERS.1.5.: chr "A" X1.5 LETTERS.1.5. 1 1 A 1 2 3 4 5 1 1 0 0 0 0 2 0 1 0 0 0 3 0 0 1 0 0 4 0 0 0 1 0 5 0 0 0 0 1 > .External(proxy:::R_apply_dist_data_frame, x, y, FALSE, f) X1.5 LETTERS.1.5. 1 1 A 'data.frame': 1 obs. of 2 variables: $ X1.5 : int 1 $ LETTERS.1.5.: chr "A" X1.5 LETTERS.1.5. 1 1 A a b c d e f 1 1 0.5 0.0 0.0 0.0 0.0 2 0 0.5 0.5 0.0 0.0 0.0 3 0 0.0 0.5 0.5 0.0 0.0 4 0 0.0 0.0 0.5 0.5 0.0 5 0 0.0 0.0 0.0 0.5 0.5 > .External(proxy:::R_apply_dist_data_frame, x, y[-6,], TRUE, f) X1.5 LETTERS.1.5. 1 1 A 'data.frame': 1 obs. of 2 variables: $ X1.5 : int 1 $ LETTERS.1.5.: chr "A" X1.5 LETTERS.1.5. 1 1 A [1] 1.0 0.5 0.5 0.5 0.5 > > > # test parameters > > f <- function(x, y, p = 1) + sum(x == y) / length(x) * p > > .External(proxy:::R_apply_dist_data_frame, x, y[-6,], TRUE, f) [1] 1.0 0.5 0.5 0.5 0.5 > .External(proxy:::R_apply_dist_data_frame, x, y[-6,], TRUE, f, p = 2) [1] 2 1 1 1 1 > > ### > > proc.time() user system elapsed 0.310 0.019 0.317 proxy/src/0000755000175100001440000000000014250213102012226 5ustar hornikusersproxy/src/util.c0000755000175100001440000001041713223372641013372 0ustar hornikusers#include #include // arrayIndex.c extern SEXP _int_array_subscript(int, SEXP, const char *, const char *, SEXP, Rboolean, SEXP); // subset a dist object. in order to preserve symmetry // we allow only one subset index. // // notes: (1) by definition we return a zero-length vector // for subscripts of length less than two. (2) coercing to // real is convenient but slightly inefficient. // // ceeboo 2006, 2007 SEXP R_subset_dist(SEXP R_x, SEXP s) { if (!inherits(R_x, "dist")) error("'x' not of class dist"); int i, j, k, si, sj, nx, ns; SEXP x = R_x, r, d; nx = 1 + (int) sqrt(2*LENGTH(x)); if (LENGTH(x) != nx*(nx-1)/2) error("'x' invalid length"); if (TYPEOF(x) != REALSXP) PROTECT(x = coerceVector(R_x, REALSXP)); PROTECT(r = allocArray(INTSXP, PROTECT(ScalarInteger(0)))); UNPROTECT(1); INTEGER(getAttrib(r, R_DimSymbol))[0] = nx; d = getAttrib(x, install("Labels")); if (!isNull(d)) { SEXP t; if (TYPEOF(d) != STRSXP) error("'Labels' not of type character"); if (LENGTH(d) != nx) error("'Labels' invalid length"); setAttrib(r, R_DimNamesSymbol, PROTECT(t = allocVector(VECSXP, 1))); UNPROTECT(1); SET_VECTOR_ELT(t, 0, d); } #ifdef _COMPAT_ PROTECT(s = arraySubscript(0, s, GET_DIM(r), getAttrib, (STRING_ELT), r)); #else PROTECT(s = _int_array_subscript(0, s, "dim", "dimnames", r, TRUE, R_NilValue)); #endif ns = LENGTH(s); for (k = 0; k < ns; k++) if (INTEGER(s)[k] == NA_INTEGER) error("'s' invalid subscript(s)"); else INTEGER(s)[k]--; PROTECT(r = allocVector(REALSXP, ns*(ns-1)/2)); k = 0; for (i = 0; i < ns-1; i++) { si = INTEGER(s)[i]; for (j = i+1; j < ns; j++) { sj = INTEGER(s)[j]; if (si == sj) REAL(r)[k++] = NA_REAL; else REAL(r)[k++] = (si > sj) ? REAL(x)[si+sj*(nx-1)-sj*(sj+1)/2-1] : REAL(x)[sj+si*(nx-1)-si*(si+1)/2-1]; } R_CheckUserInterrupt(); } if (x == R_x) copyMostAttrib(R_x, r); setAttrib(r, install("Size"), PROTECT(ScalarInteger(ns))); UNPROTECT(1); if (!isNull(d)) { SEXP t; setAttrib(r, install("Labels"), PROTECT(t = allocVector(STRSXP, ns))); UNPROTECT(1); for (k = 0; k < ns; k++) SET_STRING_ELT(t, k, STRING_ELT(d, INTEGER(s)[k])); } UNPROTECT(3); if (x != R_x) UNPROTECT(1); return r; } // compute the rowSums for an R dist object. due to // symmetry this is equivalent to colSums. rowMeans // are not implemented as these can be easily obtained // from the values of rowSums. // // na_rm implements the usual meaning of omitting NA // and NaN values, where the sum of the empty set is // zero. // // ceeboo 2006, 2007 SEXP R_rowSums_dist(SEXP R_x, SEXP na_rm) { if (!inherits(R_x, "dist")) error("'x' not of class dist"); if (isNull(na_rm) || TYPEOF(na_rm) != LGLSXP) error("'na.rm' not of type logical"); int i, j, k, n; SEXP x = R_x, r; n = 1 + (int) sqrt(2*LENGTH(x)); if (LENGTH(x) != n*(n-1)/2) error("'x' invalid length"); if (TYPEOF(x) != REALSXP) PROTECT(x = coerceVector(R_x, REALSXP)); PROTECT(r = allocVector(REALSXP, n)); memset(REAL(r), 0, sizeof(double)*n); k = 0; for (i = 0; i < n-1; i++) { for (j = i+1; j < n; j++) { double z = REAL(x)[k++]; if (!R_FINITE(z)) { if (ISNAN(z)) { if (LOGICAL(na_rm)[0] == TRUE) continue; REAL(r)[i] = REAL(r)[j] = (ISNA(z)) ? NA_REAL : R_NaN; } else REAL(r)[i] = REAL(r)[j] = z; break; } REAL(r)[i] += z; REAL(r)[j] += z; } R_CheckUserInterrupt(); } setAttrib(r, R_NamesSymbol, getAttrib(x, install("Labels"))); UNPROTECT(1); if (x != R_x) UNPROTECT(1); return r; } // produce row or column indexes SEXP R_row_dist(SEXP x, SEXP col) { if (!inherits(x, "dist")) error("'x' not of class dist"); if (isNull(col) || TYPEOF(col) != LGLSXP) error("'col' not of type logical"); int i, j, n, nx; SEXP r; nx = 1 + (int) sqrt(2*LENGTH(x)); if (LENGTH(x) != nx*(nx-1)/2) error("'x' invalid length"); PROTECT(r = allocVector(INTSXP, LENGTH(x))); n = 0; for (j = 1; j < nx; j++) for (i = j+1; i < nx+1; i++) INTEGER(r)[n++] = (*LOGICAL(col)) ? j : i; UNPROTECT(1); return r; } // proxy/src/apply.c0000755000175100001440000003434213437262301013543 0ustar hornikusers#include #include // wrapper functions for distance computation with // user-supplied functions given common data types, // such as matrix, list, and data.frames. // // note that the code is prone to breaking with new // releases of R. therefore, always check against the // reference examples. /* compute auto- or cross-distances with a user-supplied * function given matrix data. * * ceeboo 2006, 2007 */ SEXP R_apply_dist_matrix(SEXP p) { int i, j, k, l, n, nx, ny, nz, m = 0; SEXP r, c, tx, ty; SEXP R_x, x, R_y, y, R_d, f; p = CDR(p); if (length(p) < 4) error("invalid number of arguments"); R_x = x = CAR(p); R_y = y = CADR(p); if (!isMatrix(x) || (!isNull(y) && !isMatrix(y))) error("invalid data parameter(s)"); p = CDDR(p); R_d = CAR(p); if (TYPEOF(R_d) != LGLSXP) error("invalid option parameter"); p = CDR(p); f = CAR(p); if (!isFunction(f)) error("invalid function parameter"); p = CDR(p); if (isNull(y)) y = x; else if (LOGICAL(R_d)[0] == TRUE) m = 2; else m = 1; if ((n = INTEGER(GET_DIM(x))[1]) != INTEGER(GET_DIM(y))[1]) error("the number of columns of the data matrixes do not conform"); nz = nx = INTEGER(GET_DIM(x))[0]; ny = INTEGER(GET_DIM(y))[0]; if (m == 2 && nx != ny) error("the number of rows of the data matrixes do not conform"); if (TYPEOF(x) != REALSXP) { PROTECT(x = coerceVector(R_x, REALSXP)); if (isNull(R_y) || R_x == R_y) y = x; } if (TYPEOF(y) != REALSXP) PROTECT(y = coerceVector(R_y, REALSXP)); if (m == 0) { SEXP d; PROTECT(r = allocVector(REALSXP, nx*(nx-1)/2)); setAttrib(r, install("Size"), PROTECT(ScalarInteger(nx))); UNPROTECT(1); if (!isNull(d = getAttrib(x, R_DimNamesSymbol))) setAttrib(r, install("Labels"), VECTOR_ELT(d, 0)); // fixme: package? setAttrib(r, R_ClassSymbol, PROTECT(mkString("dist"))); UNPROTECT(1); } else if (m == 1) { SEXP d1, d2; PROTECT(r = allocMatrix(REALSXP, nx, ny)); d1 = getAttrib(x, R_DimNamesSymbol); d2 = getAttrib(y, R_DimNamesSymbol); if (!isNull(d1) || !isNull(d2)) { SEXP d; setAttrib(r, R_DimNamesSymbol, PROTECT(d = allocVector(VECSXP, 2))); UNPROTECT(1); SET_VECTOR_ELT(d, 0, isNull(d1) ? d1 : VECTOR_ELT(d1, 0)); SET_VECTOR_ELT(d, 1, isNull(d2) ? d2 : VECTOR_ELT(d2, 0)); } } else PROTECT(r = allocVector(REALSXP, nx)); PROTECT(tx = allocVector(REALSXP, n)); PROTECT(ty = allocVector(REALSXP, n)); PROTECT(c = LCONS(f, CONS(tx, CONS(ty, p)))); l = 0; for (j = 0; j < ny; j++) { for (k = 0; k < n; k++) REAL(ty)[k] = REAL(y)[j+k*ny]; if (m == 0) i = j+1; else if (m == 1) i = 0; else { i = j; nz = j+1; } for (; i < nz; i++) { for (k = 0; k < n; k++) REAL(tx)[k] = REAL(x)[i+k*nx]; SEXP s = eval(c, R_GlobalEnv); if (LENGTH(s) != 1) error("not a scalar return value"); // fixme: warning? if (TYPEOF(s) != REALSXP) { REAL(r)[l++] = REAL(coerceVector(PROTECT(s), REALSXP))[0]; UNPROTECT(1); } else REAL(r)[l++] = REAL(s)[0]; } R_CheckUserInterrupt(); } UNPROTECT(4); if (x != R_x) UNPROTECT(1); if (!isNull(R_y) && R_y != R_x && y != R_y) UNPROTECT(1); return r; } /* compute auto- or cross-distances with a user-supplied * function given list data. * * ceeboo 2006, 2007 */ SEXP R_apply_dist_list(SEXP p) { int i, j, l, nx, ny, nz, m = 0; SEXP r, c, d, tx, ty; SEXP x, y, f; p = CDR(p); if (length(p) < 4) error("invalid number of arguments"); x = CAR(p); y = CADR(p); if (TYPEOF(x) != VECSXP || (!isNull(y) && TYPEOF(y) != VECSXP)) error("invalid data parameter(s)"); p = CDDR(p); d = CAR(p); if (TYPEOF(d) != LGLSXP) error("invalid option parameter"); p = CDR(p); f = CAR(p); if (!isFunction(f)) error("invalid function parameter"); p = CDR(p); if (isNull(y)) y = x; else if (LOGICAL(d)[0] == TRUE) m = 2; else m = 1; nz = nx = LENGTH(x); ny = LENGTH(y); if (m == 0) { SEXP d; PROTECT(r = allocVector(REALSXP, nx*(nx-1)/2)); setAttrib(r, install("Size"), PROTECT(ScalarInteger(nx))); UNPROTECT(1); PROTECT(d = getAttrib(x, R_NamesSymbol)); if (!isNull(d)) setAttrib(r, install("Labels"), d); UNPROTECT(1); // fixme: package? setAttrib(r, R_ClassSymbol, PROTECT(mkString("dist"))); UNPROTECT(1); } else if (m == 1) { SEXP d1, d2; PROTECT(r = allocMatrix(REALSXP, nx, ny)); PROTECT(d1 = getAttrib(x, R_NamesSymbol)); PROTECT(d2 = getAttrib(y, R_NamesSymbol)); if (!isNull(d1) || !isNull(d2)) { SEXP d; setAttrib(r, R_DimNamesSymbol, PROTECT(d = allocVector(VECSXP, 2))); UNPROTECT(1); SET_VECTOR_ELT(d, 0, d1); SET_VECTOR_ELT(d, 1, d2); } UNPROTECT(2); } else { if (nx != ny) error("the number of components of 'x' and 'y' does not conform"); PROTECT(r = allocVector(REALSXP, nx)); } PROTECT(ty = CONS(R_NilValue, p)); PROTECT(tx = CONS(R_NilValue, ty)); PROTECT(c = LCONS(f, tx)); l = 0; for (j = 0; j < ny; j++) { SETCAR(ty, VECTOR_ELT(y, j)); if (m == 0) i = j+1; else if (m == 1) i = 0; else { i = j; nz = j+1; } for (; i < nz; i++) { SETCAR(tx, VECTOR_ELT(x, i)); SEXP s = eval(c, R_GlobalEnv); if (LENGTH(s) != 1) error("not a scalar return value"); // fixme: warning? if (TYPEOF(s) != REALSXP) { REAL(r)[l++] = REAL(coerceVector(PROTECT(s), REALSXP))[0]; UNPROTECT(1); } else REAL(r)[l++] = REAL(s)[0]; } R_CheckUserInterrupt(); } UNPROTECT(4); return r; } /* compute binary auto- or cross-distances with a user-supplied * function given logical matrix data, and by precomputing the * number of concordant and discordant pairs. * * dm 2007 */ SEXP R_apply_dist_binary_matrix(SEXP p) { int i, j, k, l, n, nx, ny, nz, m = 0; int i0, j0; SEXP r, c, d, ta, tb, tc, td, tn; SEXP x, y, f; p = CDR(p); if (length(p) < 3) error("invalid number of arguments"); x = CAR(p); y = CADR(p); if (!isMatrix(x) || TYPEOF(x) != LGLSXP || (!isNull(y) && (!isMatrix(y) || TYPEOF(x) != LGLSXP))) error("invalid data parameter(s)"); p = CDDR(p); d = CAR(p); if (TYPEOF(d) != LGLSXP) error("invalid option parameter"); p = CDR(p); f = CAR(p); if (!isFunction(f)) error("invalid function parameter"); p = CDR(p); if (isNull(y)) y = x; else if (LOGICAL(d)[0] == TRUE) m = 2; else m = 1; if ((n = INTEGER(GET_DIM(x))[1]) != INTEGER(GET_DIM(y))[1]) error("data parameters do not conform"); nz = nx = INTEGER(GET_DIM(x))[0]; ny = INTEGER(GET_DIM(y))[0]; if (m == 0) { SEXP d; PROTECT(r = allocVector(REALSXP, nx*(nx-1)/2)); setAttrib(r, install("Size"), PROTECT(ScalarInteger(nx))); UNPROTECT(1); if (!isNull(d = getAttrib(x, R_DimNamesSymbol))) setAttrib(r, install("Labels"), VECTOR_ELT(d, 0)); // fixme: package? setAttrib(r, R_ClassSymbol, PROTECT(mkString("dist"))); UNPROTECT(1); } else if (m == 1) { SEXP d1, d2; PROTECT(r = allocMatrix(REALSXP, nx, ny)); d1 = getAttrib(x, R_DimNamesSymbol); d2 = getAttrib(y, R_DimNamesSymbol); if (!isNull(d1) || !isNull(d2)) { SEXP d; setAttrib(r, R_DimNamesSymbol, PROTECT(d = allocVector(VECSXP, 2))); UNPROTECT(1); SET_VECTOR_ELT(d, 0, isNull(d1) ? d1 : VECTOR_ELT(d1, 0)); SET_VECTOR_ELT(d, 1, isNull(d2) ? d2 : VECTOR_ELT(d2, 0)); } } else { if (nx != ny) error("the number of rows of 'x' and 'y' does not conform"); PROTECT(r = allocVector(REALSXP, nx)); } PROTECT(ta = allocVector(INTSXP, 1)); PROTECT(tb = allocVector(INTSXP, 1)); PROTECT(tc = allocVector(INTSXP, 1)); PROTECT(td = allocVector(INTSXP, 1)); PROTECT(tn = allocVector(INTSXP, 1)); PROTECT(c = LCONS(f, CONS(ta, CONS(tb, CONS(tc, CONS(td, CONS(tn, p)))) ))); l = 0; for (j = 0; j < ny; j++) { if (m == 0) i = j+1; else if (m == 1) i = 0; else { i = j; nz = j+1; } for (; i < nz; i++) { INTEGER(ta)[0] = INTEGER(tb)[0] = INTEGER(tc)[0] = INTEGER(tn)[0] = 0; for (k = 0; k < n; k++) { i0 = LOGICAL(x)[i + k * nx]; j0 = LOGICAL(y)[j + k * ny]; if (i0 == NA_LOGICAL || j0 == NA_LOGICAL) continue; INTEGER(ta)[0] += (i0 == TRUE && j0 == TRUE); INTEGER(tb)[0] += (i0 == TRUE && j0 == FALSE); INTEGER(tc)[0] += (i0 == FALSE && j0 == TRUE); INTEGER(tn)[0]++; } if (INTEGER(tn)[0] == 0) INTEGER(td)[0] = 0; else INTEGER(td)[0] = INTEGER(tn)[0] - INTEGER(ta)[0] - INTEGER(tb)[0] - INTEGER(tc)[0]; SEXP s = eval(c, R_GlobalEnv); if (LENGTH(s) != 1) error("not a scalar return value"); // fixme: warning? if (TYPEOF(s) != REALSXP) { REAL(r)[l++] = REAL(coerceVector(PROTECT(s), REALSXP))[0]; UNPROTECT(1); } else REAL(r)[l++] = REAL(s)[0]; } R_CheckUserInterrupt(); } UNPROTECT(7); return r; } /* compute auto- or cross-distances with a user-supplied * function given data.frame data. * * because of the details this is insane ... * * ceeboo 2007 */ static void setElement(SEXP x, int i, SEXP y) { switch (TYPEOF(x)) { case LGLSXP: LOGICAL(x)[0] = LOGICAL(y)[i]; break; case INTSXP: INTEGER(x)[0] = INTEGER(y)[i]; break; case REALSXP: REAL(x)[0] = REAL(y)[i]; break; case STRSXP: SET_STRING_ELT(x, 0, STRING_ELT(y, i)); break; case VECSXP: SET_VECTOR_ELT(x, 0, VECTOR_ELT(y, i)); break; default: error("type not implemented"); } } SEXP R_apply_dist_data_frame(SEXP p) { int i, j, k, l, nc, nx, ny, nz, m = 0; SEXP r, c, d, tx, ty, rx, ry; SEXP x, y, f; p = CDR(p); if (length(p) < 4) error("invalid number of arguments"); x = CAR(p); y = CADR(p); if (!inherits(x, "data.frame") || (!isNull(y) && !inherits(y, "data.frame"))) error("invalid data parameter(s)"); p = CDDR(p); d = CAR(p); if (TYPEOF(d) != LGLSXP) error("invalid option parameter"); p = CDR(p); f = CAR(p); if (!isFunction(f)) error("invalid function parameter"); p = CDR(p); nc = LENGTH(x); if (nc == 0) error("cannot handle empty data frames"); nx = ny = nz = LENGTH(VECTOR_ELT(x, 0)); if (isNull(y)) y = x; else { if (LENGTH(y) != nc) error("data parameters do not conform"); ny = LENGTH(VECTOR_ELT(y, 0)); for (k = 0; k < nc; k++) { if (TYPEOF(VECTOR_ELT(x, k)) != TYPEOF(VECTOR_ELT(y, k))) error("data parameters do not conform"); // sucks: the c code in identical.c is not // accessible. c = eval(PROTECT(LCONS(install("identical"), PROTECT( CONS(ATTRIB(VECTOR_ELT(x, k)), CONS(ATTRIB(VECTOR_ELT(y, k)), R_NilValue))))), R_GlobalEnv); UNPROTECT(2); if (LOGICAL(c)[0] == FALSE) error("attributes of data parameters do not conform"); } if (LOGICAL(d)[0] == TRUE) { if (nx != ny) error("the number of rows of 'x' and 'y' do not conform"); m = 2; } else m = 1; } // fixme: row.names if (m == 0) { PROTECT(r = allocVector(REALSXP, nx*(nx-1)/2)); setAttrib(r, install("Size"), PROTECT(ScalarInteger(nx))); UNPROTECT(1); setAttrib(r, install("Labels"), PROTECT(coerceVector(PROTECT(getAttrib(x, install("row.names"))), STRSXP))); UNPROTECT(2); setAttrib(r, R_ClassSymbol, PROTECT(mkString("dist"))); UNPROTECT(1); } else if (m == 1) { SEXP d; PROTECT(r = allocMatrix(REALSXP, nx, ny)); setAttrib(r, R_DimNamesSymbol, PROTECT(d = allocVector(VECSXP, 2))); UNPROTECT(1); SET_VECTOR_ELT(d, 0, coerceVector(PROTECT(getAttrib(x, install("row.names"))), STRSXP)); UNPROTECT(1); SET_VECTOR_ELT(d, 1, coerceVector(PROTECT(getAttrib(y, install("row.names"))), STRSXP)); UNPROTECT(1); } else PROTECT(r = allocVector(REALSXP, nx)); PROTECT(tx = allocVector(VECSXP, nc)); setAttrib(tx, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); setAttrib(tx, install("row.names"), PROTECT(rx = allocVector(INTSXP, 1))); UNPROTECT(1); setAttrib(tx, R_ClassSymbol, getAttrib(x, R_ClassSymbol)); PROTECT(ty = allocVector(VECSXP, nc)); setAttrib(ty, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); setAttrib(ty, install("row.names"), PROTECT(ry = allocVector(INTSXP, 1))); UNPROTECT(1); setAttrib(ty, R_ClassSymbol, getAttrib(x, R_ClassSymbol)); for (k = 0; k < nc; k++) { SEXP t, s; t = VECTOR_ELT(x, k); // fixme: should fail for S4 SET_VECTOR_ELT(tx, k, (s = allocVector(TYPEOF(t), 1))); SET_ATTRIB(s, ATTRIB(t)); // fixme: may be wrong SET_OBJECT(s, OBJECT(t)); SET_VECTOR_ELT(ty, k, (s = allocVector(TYPEOF(t), 1))); SET_ATTRIB(s, ATTRIB(t)); SET_OBJECT(s, OBJECT(t)); } PROTECT(c = LCONS(f, CONS(tx, CONS(ty, p)))); l = 0; for (j = 0; j < ny; j++) { for (k = 0; k < nc; k++) setElement(VECTOR_ELT(ty, k), j, VECTOR_ELT(y, k)); INTEGER(ry)[0] = j+1; // R index if (m == 0) i = j+1; else if (m == 1) i = 0; else { i = j; nz = j+1; } for (; i < nz; i++) { for (k = 0; k < nc; k++) setElement(VECTOR_ELT(tx, k), i, VECTOR_ELT(x, k)); INTEGER(rx)[0] = i+1; SEXP s = eval(c, R_GlobalEnv); if (LENGTH(s) != 1) error("not a scalar return value"); // fixme: warning? if (TYPEOF(s) != REALSXP) { REAL(r)[l++] = REAL(coerceVector(PROTECT(s), REALSXP))[0]; UNPROTECT(1); } else REAL(r)[l++] = REAL(s)[0]; } R_CheckUserInterrupt(); } UNPROTECT(4); return r; } // proxy/src/distance.c0000755000175100001440000002513413222626722014212 0ustar hornikusers#include #include #include // extends the code from stats/src/distance.c // to handle auto- and cross-distances. // // note: the runtime in the symmetric case is // not always optimal. // // ceeboo 2007, 2008, 2009, 2014, 2016 #define both_non_NA(a,b) (!ISNAN(a) && !ISNAN(b)) #define both_FINITE(a,b) (R_FINITE(a) && R_FINITE(b)) typedef double (* DFUN)(double *, double *, int, int, int); static double dfp = 1; static double minkowski(double *x, double *y, int nx, int ny, int nc) { double dev, dist; int count, j; count = 0; dist = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { dev = (*x - *y); if (!ISNAN(dev)) { dist += R_pow(fabs(dev), dfp); count++; } } x += nx; y += ny; } if (count == 0) return NA_REAL; if (count != nc) dist /= ((double)count/nc); return R_pow(dist, 1.0/dfp); } static double euclidean(double *x, double *y, int nx, int ny, int nc) { double dev, dist; int count, j; count = 0; dist = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { dev = (*x - *y); if (!ISNAN(dev)) { dist += dev * dev; count++; } } x += nx; y += ny; } if (count == 0) return NA_REAL; if (count != nc) dist /= ((double)count/nc); return sqrt(dist); } static double maximum(double *x, double *y, int nx, int ny, int nc) { double dev, dist; int count, j; count = 0; dist = -DBL_MAX; for (j = 0 ; j < nc ; j++) { if (both_non_NA(*x, *y)) { dev = fabs(*x - *y); if (!ISNAN(dev)) { if (dev > dist) dist = dev; count++; } } x += nx; y += ny; } if (count == 0) return NA_REAL; //if (count != nc) dist /= ((double)count/nc); return dist; } static double manhattan(double *x, double *y, int nx, int ny, int nc) { double dev, dist; int count, j; count = 0; dist = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { dev = fabs(*x - *y); if (!ISNAN(dev)) { dist += dev; count++; } } x += nx; y += ny; } if (count == 0) return NA_REAL; if (count != nc) dist /= ((double)count/nc); return dist; } // fixme: NA for two all-zero vectors static double canberra(double *x, double *y, int nx, int ny, int nc) { double dev, dist, sum, diff; int count, j; count = 0; dist = 0; for (j = 0 ;j < nc; j++) { if (both_non_NA(*x, *y)) { sum = fabs(*x + *y); diff = fabs(*x - *y); if (sum > DBL_MIN || diff > DBL_MIN) { dev = diff/sum; if (!ISNAN(dev) || (!R_FINITE(diff) && diff == sum && /* use Inf = lim x -> oo */ (dev = 1.))) { dist += dev; count++; } } } x += nx; y += ny; } if (count == 0) return NA_REAL; if (count != nc) dist /= ((double)count/nc); return dist; } // FIXME why treat not both finite as NA? static double binary(double *x, double *y, int nx, int ny, int nc) { int total, count, dist; int j; total = count = dist = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { if (*x || *y) { count++; if (!(*x && *y)) dist++; } total++; } x += nx; y += ny; } if (total == 0) return NA_REAL; if (count == 0) return 0; return (double) dist / count; } static double matching(double *x, double *y, int nx, int ny, int nc) { int total, count; int j; total = count = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { if (*x != *y) count++; total++; } x += nx; y += ny; } if (total == 0) return NA_REAL; if (count == 0) return 0; return (double) count / total; } static double fuzzy(double *x, double *y, int nx, int ny, int nc) { double dist, smax, smin; int count, j; count = 0; smax = smin = 0; for (j = 0; j < nc; j++) { if (both_FINITE(*x, *y)) { if (*x > *y) { smax += *x; smin += *y; } else { smax += *y; smin += *x; } count++; } x += nx; y += ny; } if (count == 0) return NA_REAL; if (!R_FINITE(smin)) return NA_REAL; dist = smin / smax; if (ISNAN(dist)) return 0; return 1-dist; } static double mutual(double *x, double *y, int nx, int ny, int nc) { double dist; int total, count, cx, cy, j; total = count = cx = cy = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { if (*x && *y) count++; cx += (*x && 1); cy += (*y && 1); total++; } x += nx; y += ny; } if (total == 0) return NA_REAL; if (cx == 0 || cy == 0 || cx == total || cy == total) return 0; dist = 0; if (count > 0) dist += (double) count / total * log((double) count / cx / cy * total); cy = total - cy; count = cx - count; if (count > 0) dist += (double) count / total * log((double) count / cx / cy * total); cx = total - cx; count = cy - count; if (count > 0) dist += (double) count / total * log((double) count / cx / cy * total); cy = total - cy; count = cx - count; if (count > 0) dist += (double) count / total * log((double) count / cx / cy * total); if (total != nc) dist /= ((double)total/nc); return dist ; } // wrapper static SEXP dists(SEXP R_x, SEXP R_y, SEXP R_d, DFUN f, SEXP R_p) { if (!isMatrix(R_x)) error("'x' not of class matrix"); if (!isNull(R_y) && !isMatrix(R_y)) error("'y' not of class matrix"); if (TYPEOF(R_d) != LGLSXP) error("'d' not of type logical"); int i, j, n, nx, ny, nc, m = 0; SEXP x = R_x, y = R_y, r; if (!isNull(R_p)) // fixme: check? dfp = *REAL(R_p); if (isNull(y)) y = x; else if (LOGICAL(R_d)[0] == TRUE) m = 2; else m = 1; // return matrix nc = INTEGER(GET_DIM(x))[1]; if (INTEGER(GET_DIM(y))[1] != nc) error("invalid number of columns"); nx = INTEGER(GET_DIM(x))[0]; ny = INTEGER(GET_DIM(y))[0]; if (m == 2 && nx != ny) error("invalid number of rows for pairwise mode"); if (TYPEOF(x) != REALSXP) { PROTECT(x = coerceVector(R_x, REALSXP)); if (isNull(R_y) || R_x == R_y) y = x; } if (TYPEOF(y) != REALSXP) PROTECT(y = coerceVector(R_y, REALSXP)); if (m == 0) { SEXP d; PROTECT(r = allocVector(REALSXP, nx*(nx-1)/2)); setAttrib(r, install("Size"), PROTECT(ScalarInteger(nx))); UNPROTECT(1); if (!isNull(d = getAttrib(x, R_DimNamesSymbol))) setAttrib(r, install("Labels"), VECTOR_ELT(d, 0)); // fixme: package? setAttrib(r, R_ClassSymbol, PROTECT(mkString("dist"))); UNPROTECT(1); } else if (m == 1) { SEXP d1, d2; PROTECT(r = allocMatrix(REALSXP, nx, ny)); d1 = getAttrib(x, R_DimNamesSymbol); d2 = getAttrib(y, R_DimNamesSymbol); if (!isNull(d1) || !isNull(d2)) { SEXP d; setAttrib(r, R_DimNamesSymbol, PROTECT(d = allocVector(VECSXP, 2))); UNPROTECT(1); SET_VECTOR_ELT(d, 0, isNull(d1) ? d1 : VECTOR_ELT(d1, 0)); SET_VECTOR_ELT(d, 1, isNull(d2) ? d2 : VECTOR_ELT(d2, 0)); } } else PROTECT(r = allocVector(REALSXP, nx)); n = 0; for (j = 0; j < ny; j++) { if (m == 2) REAL(r)[n++] = f(REAL(x)+j, REAL(y)+j, nx, ny, nc); else for (i = (m == 0) ? j+1 : 0; i < nx; i++) REAL(r)[n++] = f(REAL(x)+i, REAL(y)+j, nx, ny, nc); R_CheckUserInterrupt(); } UNPROTECT(1); if (x != R_x) UNPROTECT(1); if (!isNull(R_y) && R_y != R_x && y != R_y) UNPROTECT(1); return r; } // R wrappers SEXP R_minkowski_dist(SEXP x, SEXP y, SEXP d, SEXP p) { if (isNull(p)) error("'p' invalid"); return dists(x, y, d, minkowski, p); } SEXP R_euclidean_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, euclidean, R_NilValue); } SEXP R_maximum_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, maximum, R_NilValue); } SEXP R_manhattan_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, manhattan, R_NilValue); } SEXP R_canberra_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, canberra, R_NilValue); } SEXP R_binary_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, binary, R_NilValue); } SEXP R_matching_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, matching, R_NilValue); } SEXP R_fuzzy_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, fuzzy, R_NilValue); } SEXP R_mutual_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, mutual, R_NilValue); } // 2017/10 SEXP R_bjaccard(SEXP x, SEXP y, SEXP d) { SEXP r = dists(x, y, d, binary, R_NilValue); for (int k = 0; k < LENGTH(r); k++) { double z = REAL(r)[k]; if (!ISNAN(z)) REAL(r)[k] = 1 - z; } return r; } static double ebinary(double *x, double *y, int nx, int ny, int nc) { double dev, prod, dist, xy; int count; int j; dist = xy = 0; count = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { dev = (*x - *y); prod = *x * *y; if (!ISNAN(dev) && !ISNAN(prod)) { dist += dev * dev; xy += prod; count++; } } x += nx; y += ny; } if (count == 0) return NA_REAL; if (!R_FINITE(xy)) return NA_REAL; dist = dist / dfp + xy; xy /= dist; if (ISNAN(xy)) if (dist < DBL_MIN) return 1; else return NA_REAL; return xy; } SEXP R_ess2(SEXP x, SEXP y, SEXP d) { dfp = .5; return dists(x, y, d, ebinary, R_NilValue); } SEXP R_ejaccard(SEXP x, SEXP y, SEXP d) { dfp = 1; return dists(x, y, d, ebinary, R_NilValue); } SEXP R_edice(SEXP x, SEXP y, SEXP d) { dfp = 2; return dists(x, y, d, ebinary, R_NilValue); } static double cosine(double *x, double *y, int nx, int ny, int nc) { double prod, xy, xx, yy; int count; int j; xy = xx = yy = 0; count = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { prod = *x * *y; if (!ISNAN(prod)) { xy += prod; xx += *x * *x; yy += *y * *y; count++; } } x += nx; y += ny; } if (count == 0) return NA_REAL; if (!R_FINITE(xy)) return NA_REAL; xy /= sqrt(xx) * sqrt(yy); if (ISNAN(xy)) if (xx < DBL_MIN && yy < DBL_MIN) return 1; else if (xx < DBL_MIN || yy < DBL_MIN) return 0; else return NA_REAL; return xy; } SEXP R_cosine(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, cosine, R_NilValue); } // proxy/src/arrayIndex.c0000755000175100001440000001662112265347202014526 0ustar hornikusers #include #include // workaround i18n #define _(x) (x) // copied from 2.14-2 src/main/subscript.c // // ceeboo 2011/11 2014/1 // #define ECALL(call, yy) if(call == R_NilValue) error(yy); else errorcall(call, yy); static SEXP nullSubscript(int n) { int i; SEXP indx; indx = allocVector(INTSXP, n); for (i = 0; i < n; i++) INTEGER(indx)[i] = i + 1; return indx; } static SEXP logicalSubscript(SEXP s, int ns, int nx, int *stretch, SEXP call) { int canstretch, count, i, nmax; SEXP indx; canstretch = *stretch; if (!canstretch && ns > nx) { ECALL(call, _("(subscript) logical subscript too long")); } nmax = (ns > nx) ? ns : nx; *stretch = (ns > nx) ? ns : 0; if (ns == 0) return(allocVector(INTSXP, 0)); count = 0; for (i = 0; i < nmax; i++) if (LOGICAL(s)[i%ns]) count++; indx = allocVector(INTSXP, count); count = 0; for (i = 0; i < nmax; i++) if (LOGICAL(s)[i%ns]) { if (LOGICAL(s)[i%ns] == NA_LOGICAL) INTEGER(indx)[count++] = NA_INTEGER; else INTEGER(indx)[count++] = i + 1; } return indx; } static SEXP negativeSubscript(SEXP s, int ns, int nx, SEXP call) { SEXP indx; int stretch = 0; int i, ix; PROTECT(indx = allocVector(LGLSXP, nx)); for (i = 0; i < nx; i++) LOGICAL(indx)[i] = 1; for (i = 0; i < ns; i++) { ix = INTEGER(s)[i]; if (ix != 0 && ix != NA_INTEGER && -ix <= nx) LOGICAL(indx)[-ix - 1] = 0; } s = logicalSubscript(indx, nx, nx, &stretch, call); UNPROTECT(1); return s; } static SEXP positiveSubscript(SEXP s, int ns, int nx) { SEXP indx; int i, zct = 0; for (i = 0; i < ns; i++) { if (INTEGER(s)[i] == 0) zct++; } if (zct) { indx = allocVector(INTSXP, (ns - zct)); for (i = 0, zct = 0; i < ns; i++) if (INTEGER(s)[i] != 0) INTEGER(indx)[zct++] = INTEGER(s)[i]; return indx; } else return s; } static SEXP integerSubscript(SEXP s, int ns, int nx, int *stretch, SEXP call) { int i, ii, min, max, canstretch; Rboolean isna = FALSE; canstretch = *stretch; *stretch = 0; min = 0; max = 0; for (i = 0; i < ns; i++) { ii = INTEGER(s)[i]; if (ii != NA_INTEGER) { if (ii < min) min = ii; if (ii > max) max = ii; } else isna = TRUE; } if (max > nx) { if(canstretch) *stretch = max; else { ECALL(call, _("subscript out of bounds")); } } if (min < 0) { if (max == 0 && !isna) return negativeSubscript(s, ns, nx, call); else { ECALL(call, _("only 0's may be mixed with negative subscripts")); } } else return positiveSubscript(s, ns, nx); return R_NilValue; } /* This uses a couple of horrible hacks in conjunction with * VectorAssign (in subassign.c). If subscripting is used for * assignment, it is possible to extend a vector by supplying new * names, and we want to give the extended vector those names, so they * are returned as the use.names attribute. Also, unset elements of the vector * of new names (places where a match was found) are indicated by * setting the element of the newnames vector to NULL. */ /* The original code (pre 2.0.0) used a ns x nx loop that was too * slow. So now we hash. Hashing is expensive on memory (up to 32nx * bytes) so it is only worth doing if ns * nx is large. If nx is * large, then it will be too slow unless ns is very small. */ static SEXP stringSubscript(SEXP s, int ns, int nx, SEXP names, int *stretch, Rboolean in, SEXP call) { SEXP indx, indexnames; int i, j, nnames, sub, extra; int canstretch = *stretch; /* product may overflow, so check factors as well. */ Rboolean usehashing = in && ( ((ns > 1000 && nx) || (nx > 1000 && ns)) || (ns * nx > 15*nx + ns) ); PROTECT(s); PROTECT(names); PROTECT(indexnames = allocVector(VECSXP, ns)); nnames = nx; extra = nnames; /* Process each of the subscripts. First we compare with the names * on the vector and then (if there is no match) with each of the * previous subscripts, since (if assigning) we may have already * added an element of that name. (If we are not assigning, any * nonmatch will have given an error.) */ if(usehashing) { /* must be internal, so names contains a character vector */ /* NB: this does not behave in the same way with respect to "" and NA names: they will match */ PROTECT(indx = match(names, s, 0)); /* second pass to correct this */ for (i = 0; i < ns; i++) if(STRING_ELT(s, i) == NA_STRING || !CHAR(STRING_ELT(s, i))[0]) INTEGER(indx)[i] = 0; for (i = 0; i < ns; i++) SET_VECTOR_ELT(indexnames, i, R_NilValue); } else { PROTECT(indx = allocVector(INTSXP, ns)); for (i = 0; i < ns; i++) { sub = 0; if (names != R_NilValue) { for (j = 0; j < nnames; j++) { SEXP names_j = STRING_ELT(names, j); if (!in && TYPEOF(names_j) != CHARSXP) { ECALL(call, _("character vector element does not have type CHARSXP")); } if (NonNullStringMatch(STRING_ELT(s, i), names_j)) { sub = j + 1; SET_VECTOR_ELT(indexnames, i, R_NilValue); break; } } } INTEGER(indx)[i] = sub; } } for (i = 0; i < ns; i++) { sub = INTEGER(indx)[i]; if (sub == 0) { for (j = 0 ; j < i ; j++) if (NonNullStringMatch(STRING_ELT(s, i), STRING_ELT(s, j))) { sub = INTEGER(indx)[j]; SET_VECTOR_ELT(indexnames, i, STRING_ELT(s, j)); break; } } if (sub == 0) { if (!canstretch) { ECALL(call, _("subscript out of bounds")); } extra += 1; sub = extra; SET_VECTOR_ELT(indexnames, i, STRING_ELT(s, i)); } INTEGER(indx)[i] = sub; } /* We return the new names as the names attribute of the returned subscript vector. */ if (extra != nnames) setAttrib(indx, install("use.names"), indexnames); if (canstretch) *stretch = extra; UNPROTECT(4); return indx; } /* Array Subscripts. dim is the dimension (0 to k-1) s is the subscript list, dn is the attribute name of dim dnn is the attribute name of dimnames x is the array to be subscripted. */ SEXP _int_array_subscript(int dim, SEXP s, const char *dn, const char *dnn, SEXP x, Rboolean in, SEXP call) { int nd, ns, stretch = 0; SEXP dnames, tmp; ns = LENGTH(s); nd = INTEGER(getAttrib(x, install(dn)))[dim]; switch (TYPEOF(s)) { case NILSXP: return allocVector(INTSXP, 0); case LGLSXP: return logicalSubscript(s, ns, nd, &stretch, call); case INTSXP: return integerSubscript(s, ns, nd, &stretch, call); case REALSXP: PROTECT(tmp = coerceVector(s, INTSXP)); tmp = integerSubscript(tmp, ns, nd, &stretch, call); UNPROTECT(1); return tmp; case STRSXP: dnames = getAttrib(x, install(dnn)); if (dnames == R_NilValue) { ECALL(call, _("no 'dimnames' attribute for array")); } dnames = VECTOR_ELT(dnames, dim); return stringSubscript(s, ns, nd, dnames, &stretch, in, call); case SYMSXP: if (s == R_MissingArg) return nullSubscript(nd); default: if (call == R_NilValue) error(_("invalid subscript type '%s'"), type2char(TYPEOF(s))); else errorcall(call, _("invalid subscript type '%s'"), type2char(TYPEOF(s))); } return R_NilValue; } // R interface SEXP R_arraySubscript(SEXP x, SEXP dim, SEXP s, SEXP dn, SEXP dnn) { // FIXME return _int_array_subscript(INTEGER(dim)[0], s, (const char *) CHAR(STRING_ELT(dn, 0)), (const char *) CHAR(STRING_ELT(dnn, 0)), x, TRUE, R_NilValue); } // proxy/src/dll.c0000755000175100001440000000473613044065103013170 0ustar hornikusers #include #include #include extern SEXP R_minkowski_dist(SEXP x, SEXP y, SEXP d, SEXP p); extern SEXP R_euclidean_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_maximum_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_manhattan_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_canberra_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_binary_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_matching_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_fuzzy_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_mutual_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_bjaccard(SEXP R_x, SEXP R_y, SEXP R_d); extern SEXP R_ejaccard(SEXP R_x, SEXP R_y, SEXP R_d); extern SEXP R_edice(SEXP R_x, SEXP R_y, SEXP R_d); extern SEXP R_cosine(SEXP R_x, SEXP R_y, SEXP R_d); extern SEXP R_subset_dist(SEXP R_x, SEXP s); extern SEXP R_rowSums_dist(SEXP R_x, SEXP na_rm); extern SEXP R_row_dist(SEXP x, SEXP col); extern SEXP R_apply_dist_matrix(SEXP p); extern SEXP R_apply_dist_list(SEXP p); extern SEXP R_apply_dist_binary_matrix(SEXP p); extern SEXP R_apply_dist_data_frame(SEXP p); static const R_CallMethodDef CallEntries[] = { {"R_minkowski_dist", (DL_FUNC) R_minkowski_dist, 4}, {"R_euclidean_dist", (DL_FUNC) R_euclidean_dist, 3}, {"R_maximum_dist", (DL_FUNC) R_maximum_dist, 3}, {"R_manhattan_dist", (DL_FUNC) R_manhattan_dist, 3}, {"R_canberra_dist", (DL_FUNC) R_canberra_dist, 3}, {"R_binary_dist", (DL_FUNC) R_binary_dist, 3}, // {"R_matching_dist", (DL_FUNC) R_matching_dist, 3}, {"R_fuzzy_dist", (DL_FUNC) R_fuzzy_dist, 3}, // {"R_mutual_dist", (DL_FUNC) R_mutual_dist, 3}, {"R_bjaccard", (DL_FUNC) R_bjaccard, 3}, {"R_ejaccard", (DL_FUNC) R_ejaccard, 3}, {"R_edice", (DL_FUNC) R_edice, 3}, {"R_cosine", (DL_FUNC) R_cosine, 3}, {"R_subset_dist", (DL_FUNC) R_subset_dist, 2}, {"R_rowSums_dist", (DL_FUNC) R_rowSums_dist, 2}, {"R_row_dist", (DL_FUNC) R_row_dist, 2}, {NULL, NULL, 0} }; static const R_ExternalMethodDef ExternalEntries[] = { {"R_apply_dist_matrix", (DL_FUNC) R_apply_dist_matrix, -1}, {"R_apply_dist_list", (DL_FUNC) R_apply_dist_list, -1}, {"R_apply_dist_binary_matrix", (DL_FUNC) R_apply_dist_binary_matrix, -1}, {"R_apply_dist_data_frame", (DL_FUNC) R_apply_dist_data_frame, -1}, {NULL, NULL, 0} }; void R_init_proxy(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, ExternalEntries); R_useDynamicSymbols(dll, FALSE); } proxy/vignettes/0000755000175100001440000000000014250213102013447 5ustar hornikusersproxy/vignettes/overview.Rnw0000755000175100001440000000145711714440422016030 0ustar hornikusers\documentclass{article} \title{Proximity measures in the \texttt{proxy} package for {\sf R}} \author{David Meyer} %\VignetteIndexEntry{Proximity measures in the proxy package for R} %\VignetteDepends{proxy} %\VignetteKeywords{similarities,dissimilarities,distance} %\VignettePackage{proxy} <>= library(proxy) x <- summary(pr_DB, "long") FUN <- function(index) { for (i in which(index)) { writeLines(sprintf("Aliases: %s", paste(x$names[[i]], collapse = ", "))) writeLines(sprintf("Type : %s", x$type[i])) writeLines(sprintf("Formula: %s\n", x$formula[i])) } } @ \begin{document} \maketitle \section{Similarities} <>= FUN(x$distance == FALSE) @ \section{Dissimilarities} <>= FUN(x$distance == TRUE) @ \end{document} proxy/R/0000755000175100001440000000000014250213102011640 5ustar hornikusersproxy/R/util.R0000755000175100001440000000254213037347321012763 0ustar hornikusers ## wrappers for class dist ## ## note that all type checking and coercing ## is now done in C, as well as handling of ## attributes. ## ## fixme: create generic functions? ## ## ceeboo 2007 dim.dist <- function(x) rep.int(attr(x, "Size"), 2) # works with nrow and ncol dimnames.dist <- names.dist <- function(x) attr(x, "Labels") "dimnames<-.dist" <- "names<-.dist" <- function(x, value) { if (is.null(value)) attr(x, "Labels") <- NULL else { if (length(value) != attr(x, "Size")) stop("dimension of 'x' and length of 'value' do not conform") attr(x, "Labels") <- as.character(value) } x } row.dist <- function(x) .Call(R_row_dist, x, FALSE) col.dist <- function(x) .Call(R_row_dist, x, TRUE) ## subset.dist <- "[[.dist" <- function(x, subset, ...) { if (missing(subset)) return(x) .Call(R_subset_dist, x, unique(subset)) } ## rowSums.dist <- colSums.dist <- function(x, na.rm = FALSE) .Call(R_rowSums_dist, x, na.rm) ## rowMeans.dist <- colMeans.dist <- function(x, na.rm = FALSE, diag = TRUE) { if (!is.logical(diag)) stop("'diag' not of type logical") s <- rowSums.dist(x, na.rm) if (na.rm) { x[!(is.na(x) | is.nan(x))] <- 1 s / (rowSums.dist(x, na.rm) + (diag == TRUE)) } else s / (length(s) - (diag == FALSE)) } ### proxy/R/similarities.R0000644000175100001440000006405714246417210014510 0ustar hornikusers### Binary measures pr_Jaccard <- function(a, b, c, d, n) a / (n - d) pr_Jaccard_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$abcd <- TRUE reg_entry$FUN <- "pr_Jaccard" } else { storage.mode(x) <- "logical" if (!is.null(y)) storage.mode(y) <- "logical" } list(x = x, y = y, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_bjaccard", names = c("Jaccard","binary","Reyssac","Roux"), distance = FALSE, PREFUN = "pr_Jaccard_prefun", convert = "pr_simil2dist", type = "binary", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "a / (a + b + c)", reference = "Jaccard, P. (1908). Nouvelles recherches sur la distribution florale. Bull. Soc. Vaud. Sci. Nat., 44, pp. 223--270.", description = "The Jaccard Similarity (C implementation) for binary data. It is the proportion of (TRUE, TRUE) pairs, but not considering (FALSE, FALSE) pairs. So it compares the intersection with the union of object sets.") pr_Kulczynski1 <- function(a, b, c, d, n) a / (b + c) pr_DB$set_entry(FUN = "pr_Kulczynski1", names = "Kulczynski1", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "a / (b + c)", reference = "Kurzcynski, T.W. (1970). Generalized distance and discrete variables. Biometrics, 26, pp. 525--534.", description = "Kulczynski Similarity for binary data. Relates the (TRUE, TRUE) pairs to discordant pairs.") pr_Kulczynski2 <- function(a, b, c, d, n) (a / (a + b) + a / (a + c)) / 2 pr_DB$set_entry(FUN = "pr_Kulczynski2", names = "Kulczynski2", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "[a / (a + b) + a / (a + c)] / 2", reference = "Kurzcynski, T.W. (1970). Generalized distance and discrete variables. Biometrics, 26, pp. 525--534.", description = "Kulczynski Similarity for binary data. Relates the (TRUE, TRUE) pairs to the discordant pairs.") pr_Mountford <- function(a, b, c, d, n) 2 * a / (a * (b + c) + 2 * b * c) pr_DB$set_entry(FUN = "pr_Mountford", names = "Mountford", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "2a / (ab + ac + 2bc)", reference = "Mountford, M.D. (1962). An index of similarity and its application to classificatory probems. In P.W. Murphy (ed.), Progress in Soil Zoology, pp. 43--50. Butterworth, London.", description = "The Mountford Similarity for binary data.") pr_fagerMcgowan <- function(a, b, c, d, n) a / sqrt((a + b) * (a + c)) - sqrt(a + c) / 2 pr_DB$set_entry(FUN = "pr_fagerMcgowan", names = c("Fager", "McGowan"), distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "a / sqrt((a + b)(a + c)) - sqrt(a + c) / 2", reference = "Fager, E. W. and McGowan, J. A. (1963). Zooplankton species groups in the North Pacific. Science, N. Y. 140: 453-460", description = "The Fager / McGowan distance.") pr_RusselRao <- function(a, b, c, d, n) a / n pr_DB$set_entry(FUN = "pr_RusselRao", names = c("Russel","Rao"), distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "a / n", reference = "Russell, P.F., and Rao T.R. (1940). On habitat and association of species of anopheline larvae in southeastern, Madras, J. Malaria Inst. India 3, pp. 153--178", description = "The Russel/Rao Similarity for binary data. It is just the proportion of (TRUE, TRUE) pairs.") pr_SimpleMatching <- function(a, b, c, d, n) (a + d) / n pr_DB$set_entry(FUN = "pr_SimpleMatching", names = c("simple matching", "Sokal/Michener"), distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "(a + d) / n", reference = "Sokal, R.R., and Michener, C.D. (1958). A statistical method for evaluating systematic relationships. Univ. Kansas Sci. Bull., 39, pp. 1409--1438.", description = "The Simple Matching Similarity or binary data. It is the proportion of concordant pairs.") pr_Hamman <- function(a, b, c, d, n) (a + d - b - c) / n pr_DB$set_entry(FUN = "pr_Hamman", names = "Hamman", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "([a + d] - [b + c]) / n", reference = "Hamann, U. (1961). Merkmalbestand und Verwandtschaftsbeziehungen der Farinosae. Ein Beitrag zum System der Monokotyledonen. Willdenowia, 2, pp. 639-768.", description = "The Hamman Matching Similarity for binary data. It is the proportion difference of the concordant and discordant pairs.") pr_Faith <- function(a, b, c, d, n) (a + d / 2) / n pr_DB$set_entry(FUN = "pr_Faith", names = "Faith", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "(a + d/2) / n", reference = "Belbin, L., Marshall, C. & Faith, D.P. (1983). Representing relationships by automatic assignment of colour. The Australian Computing Journal 15, 160-163.", description = "The Faith similarity") pr_RogersTanimoto <- function(a, b, c, d, n) (a + d) / (a + 2 * (b + c) + d) pr_DB$set_entry(FUN = "pr_RogersTanimoto", names = c("Tanimoto", "Rogers"), distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "(a + d) / (a + 2b + 2c + d)", reference = "Rogers, D.J, and Tanimoto, T.T. (1960). A computer program for classifying plants. Science, 132, pp. 1115--1118.", description = "The Rogers/Tanimoto Similarity for binary data. Similar to the simple matching coefficient, but putting double weight on the discordant pairs.") pr_Dice <- function(a, b, c, d, n) 2 * a / (2 * a + b + c) pr_DB$set_entry(FUN = "pr_Dice", names = c("Dice", "Czekanowski", "Sorensen"), distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "2a / (2a + b + c)", reference = "Dice, L.R. (1945). Measures of the amount of ecologic association between species. Ecolology, 26, pp. 297--302.", description = "The Dice Similarity") pr_Phi <- function(a, b, c, d, n) (a * d - b * c) / (sqrt(a + b) * sqrt(c + d) * sqrt(a + c) * sqrt(b + d)) pr_DB$set_entry(FUN = "pr_Phi", names = "Phi", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "(ad - bc) / sqrt[(a + b)(c + d)(a + c)(b + d)]", reference = "Sokal, R.R, and Sneath, P.H.A. (1963). Principles of numerical taxonomy. W.H. Freeman and Company, San Francisco.", description = "The Phi Similarity (= Product-Moment-Correlation for binary variables)") pr_Stiles <- function(a, b, c, d, n) log(n) + 2 * log(abs(a * d - b * c) - 0.5 * n) - log(a + b) - log(c + d) - log(a + c) - log(b + d) pr_DB$set_entry(FUN = "pr_Stiles", names = "Stiles", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "log(n(|ad-bc| - 0.5n)^2 / [(a + b)(c + d)(a + c)(b + d)])", reference = "Stiles, H.E. (1961). The association factor in information retrieval. Communictions of the ACM, 8, 1, pp. 271--279.", description = "The Stiles Similarity (used for information retrieval). Identical to the logarithm of Krylov's distance.") pr_Michael <- function(a, b, c, d, n) 4 * (a * d - b * c) / ((a + d) * (a + d) + (b + c) * (b + c)) pr_DB$set_entry(FUN = "pr_Michael", names = "Michael", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "4(ad - bc) / [(a + d)^2 + (b + c)^2]", reference = "Cox, T.F., and Cox, M.A.A. (2001). Multidimensional Scaling. Chapmann and Hall.", description = "The Michael Similarity") pr_MozleyMargalef <- function(a, b, c, d, n) a * n / ((a + b) * (a + c)) pr_DB$set_entry(FUN = "pr_MozleyMargalef", names = c("Mozley","Margalef"), distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "an / (a + b)(a + c)", reference = "Margalef, D.R. (1958). Information theory in ecology. Gen. Systems, 3, pp. 36--71.", description = "The Mozley/Margalef Similarity") pr_Yule<- function(a, b, c, d, n) { ad <- a * d bc <- b * c (ad - bc) / (ad + bc) } pr_DB$set_entry(FUN = "pr_Yule", names = "Yule", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "(ad - bc) / (ad + bc)", reference = "Yule, G.U. (1912). On measuring associations between attributes. J. Roy. Stat. Soc., 75, pp. 579--642.", description = "Yule Similarity") pr_Yule2<- function(a, b, c, d, n) { ad <- a * d bc <- b * c (sqrt(ad) - sqrt(bc)) / (sqrt(ad) + sqrt(bc)) } pr_DB$set_entry(FUN = "pr_Yule2", names = "Yule2", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "(sqrt(ad) - sqrt(bc)) / (sqrt(ad) + sqrt(bc))", reference = "Yule, G.U. (1912). On measuring associations between attributes. J. Roy. Stat. Soc., 75, pp. 579--642.", description = "Yule Similarity") pr_Ochiai <- function(a, b, c, d, n) a / sqrt((a + b) * (a + c)) pr_DB$set_entry(FUN = "pr_Ochiai", names = "Ochiai", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "a / sqrt[(a + b)(a + c)]", reference = "Sokal, R.R, and Sneath, P.H.A. (1963). Principles of numerical taxonomy. W.H. Freeman and Company, San Francisco.", description = "The Ochiai Similarity") pr_Simpson <- function(a, b, c, d, n) a / min((a + b), (a + c)) pr_DB$set_entry(FUN = "pr_Simpson", names = "Simpson", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "a / min{(a + b), (a + c)}", reference = "Simpson, G.G. (1960). Notes on the measurement of faunal resemblance. American Journal of Science 258-A: 300-311.", description = "The Simpson Similarity (used in Zoology).") pr_BraunBlanquet <- function(a, b, c, d, n) a / max((a + b), (a + c)) pr_DB$set_entry(FUN = "pr_BraunBlanquet", names = c("Braun-Blanquet"), distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "a / max{(a + b), (a + c)}", reference = "Braun-Blanquet, J. (1964): Pflanzensoziologie. Springer Verlag, Wien and New York.", description = "The Braun-Blanquet Similarity (used in Biology).") pr_cos <- function(x, y) crossprod(x, y) / sqrt(crossprod(x) * crossprod(y)) pr_cos_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_cos" } list(x = x, y = y, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_cosine", names = "cosine", PREFUN = "pr_cos_prefun", distance = FALSE, convert = function (x) 1 - x, type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "xy / sqrt(xx * yy)", reference = "Anderberg, M.R. (1973). Cluster Analysis for Applicaitons. Academic Press.", description = "The cos Similarity (C implementation)") pr_angular <- function(x, y) 1 - acos(crossprod(x, y) / sqrt(crossprod(x) * crossprod(y))) / pi pr_DB$set_entry(FUN = pr_angular, names = "angular", distance = FALSE, convert = function (x) 1 - x, type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "1 - acos(xy / sqrt(xx * yy)) / pi", reference = "Anderberg, M.R. (1973). Cluster Analysis for Applicaitons. Academic Press.", description = "The angular similarity") pr_eJaccard <- function(x, y) { tmp <- crossprod(x, y) tmp / (crossprod(x) + crossprod(y) - tmp) } pr_eJaccard_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_eJaccard" } list(x = 0 + x, y = if (!is.null(y)) 0 + y else NULL, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_ejaccard", names = c("eJaccard", "extended_Jaccard"), PREFUN = "pr_eJaccard_prefun", distance = FALSE, convert = "pr_simil2dist", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "xy / (xx + yy - xy)", reference = "Strehl A. and Ghosh J. (2000). Value-based customer grouping from large retail data-sets. In Proc. SPIE Conference on Data Mining and Knowledge Discovery, Orlando, volume 4057, pages 33-42. SPIE.", description = "The extended Jaccard Similarity (C implementation; yields Jaccard for binary x,y).") pr_eDice <- function(x, y) { tmp <- crossprod(x, y) tmp / (crossprod(x) + crossprod(y)) } pr_eDice_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_eDice" } list(x = 0 + x, y = if (!is.null(y)) 0 + y else NULL, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_edice", names = c("eDice", "extended_Dice","eSorensen"), PREFUN = "pr_eDice_prefun", distance = FALSE, convert = "pr_simil2dist", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "2xy / (xx + yy)", ## FIXME reference = "Alexander Strehl. Relationship-based Clustering and Cluster Ensembles for High-dimensional Data Mining. PhD thesis, The University of Texas at Austin, May 2002.", description = "The extended Dice Similarity (C implementation; yields Dice for binary x,y).") pr_cor <- function(x, y) { X <- x - mean(x) Y <- y - mean(y) crossprod(X, Y) / sqrt(crossprod(X) * crossprod(Y)) } pr_DB$set_entry(FUN = "pr_cor", names = "correlation", distance = FALSE, convert = "pr_simil2dist", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "xy / sqrt(xx * yy) for centered x,y", reference = "Anderberg, M.R. (1973). Cluster Analysis for Applicaitons. Academic Press.", description = "correlation (taking n instead of n-1 for the variance)") pr_ChiSquared <- function(x, y) { tab <- table(x,y) exp <- rowSums(tab) %o% colSums(tab) / sum(tab) sum((tab - exp) ^ 2 / exp) } pr_DB$set_entry(FUN = "pr_ChiSquared", names = "Chi-squared", distance = FALSE, convert = "pr_simil2dist", type = "nominal", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sum_ij (o_i - e_i)^2 / e_i", reference = "Anderberg, M.R. (1973). Cluster Analysis for Applicaitons. Academic Press.", description = "Sum of standardized squared deviations from observed and expected values in a cross-tab for x and y.") pr_PhiSquared <- function(x, y) { tab <- table(x,y) exp <- rowSums(tab) %o% colSums(tab) / sum(tab) sum((tab - exp) ^ 2 / exp) / sum(tab) } pr_DB$set_entry(FUN = "pr_PhiSquared", names = "Phi-squared", distance = FALSE, convert = "pr_simil2dist", type = "nominal", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "[sum_ij (o_i - e_i)^2 / e_i] / n", reference = "Anderberg, M.R. (1973). Cluster Analysis for Applicaitons. Academic Press.", description = "Standardized Chi-Squared (= Chi / n).") pr_Tschuprow <- function(x, y) { tab <- table(x,y) exp <- rowSums(tab) %o% colSums(tab) / sum(tab) sqrt(sum((tab - exp) ^ 2 / exp) / sum(tab) / sqrt((nrow(tab) - 1) * (ncol(tab) - 1))) } pr_DB$set_entry(FUN = "pr_Tschuprow", names = "Tschuprow", distance = FALSE, convert = "pr_simil2dist", type = "nominal", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sqrt{[sum_ij (o_i - e_i)^2 / e_i] / n / sqrt((p - 1)(q - 1))}", reference = "Tschuprow, A.A. (1925). Grundbegriffe und Grundprobleme der Korrelationstheorie. Springer.", description = "Tschuprow-standardization of Chi-Squared.") pr_Cramer <- function(x, y) { tab <- table(x,y) exp <- rowSums(tab) %o% colSums(tab) / sum(tab) sqrt(sum((tab - exp) ^ 2 / exp) / sum(tab) / min((nrow(tab) - 1), (ncol(tab) - 1))) } pr_DB$set_entry(FUN = "pr_Cramer", names = "Cramer", distance = FALSE, convert = "pr_simil2dist", type = "nominal", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sqrt{[Chi / n)] / min[(p - 1), (q - 1)]}", reference = "Cramer, H. (1946). The elements of probability theory and some of its applications. Wiley, New York. ", description = "Cramer-standization of Chi-Squared.") pr_Pearson <- function(x, y) { tab <- table(x,y) exp <- rowSums(tab) %o% colSums(tab) / sum(tab) Chi <- sum((tab - exp) ^ 2 / exp) sqrt(Chi / (sum(tab) + Chi)) } pr_DB$set_entry(FUN = "pr_Pearson", names = c("Pearson","contingency"), distance = FALSE, convert = "pr_simil2dist", type = "nominal", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sqrt{Chi / (n + Chi)}", reference = "Anderberg, M.R. (1973). Cluster Analysis for Applicaitons. Academic Press.", description = "Contingency Coefficient. Chi is the Chi-Squared statistic.") pr_Gower <- function(x, y, l = NA, f = NA, m = NA, weights = NA) { ## prepare vectors len <- length(x) d <- logical(len) s <- double(len) ## compute scores groupwise: ## logical if (any(l)) { s[l] <- x[l] & y[l] d[l] <- x[l] | y[l] } ## factor if (any(f)) { s[f] <- x[f] == y[f] d[f] <- TRUE } ## metric if (any(m)) { s[m] <- 1 - abs(as.double(x[m]) - as.double(y[m])) d[m] <- TRUE } ## do not count missings d[is.na(s)] <- FALSE s[is.na(s)] <- 0 drop(crossprod(s, weights)) / drop(crossprod(d, weights)) } pr_Gower_prefun <- function(x, y, pairwise, p, reg_entry) { ## transform x and y x <- as.data.frame(x) if (!is.null(y)) y <- as.data.frame(y) ## determine types l <- sapply(x, is.logical) f <- sapply(x, is.factor) o <- sapply(x, is.ordered) f <- f & !o m <- !(l | f | o) ## Transform ordinal variables if (any(o)) { ##FIXME: Gower uses ranks, but daisy just uses internal codes?? x[o] <- lapply(x[o], as.integer) x[o] <- lapply(x[o], function(i) (i - 1) / (max(i) - 1)) if (!is.null(y)) { y[o] <- lapply(y[o], as.integer) y[o] <- lapply(y[o], function(i) (i - 1) / (max(i) - 1)) } m <- m | o } ## scale metric types RANGE <- function(x, y = NULL) { ## compute scale MAX <- sapply(x, max, na.rm = TRUE) MIN <- sapply(x, min, na.rm = TRUE) if (!is.null(y)) { MAX <- pmax(MAX, sapply(y, max, na.rm = TRUE)) MIN <- pmin(MIN, sapply(y, min, na.rm = TRUE)) } ret <- MAX - MIN ## do not scale when range == 0 ret[ret == 0] <- 1 ret } if (any(m)) { if (!is.null(p$ranges) && is.null(p$ranges.x)) p$ranges.x <- p$ranges r <- if(is.null(p$ranges.x)) RANGE(x[m], y[m]) else rep_len(p$ranges.x, length.out = sum(m)) if (!is.null(p$min) && is.null(p$min.x)) p$min.x <- p$min MIN <- if (is.null(p$min.x)) { if (is.null(y)) sapply(x[m], min, na.rm = TRUE) else pmin(sapply(x[m], min, na.rm = TRUE), sapply(y[m], min, na.rm = TRUE)) } else rep_len(p$min.x, length.out = sum(m)) x[m] <- sweep(sweep(x[m], 2, MIN), 2, r, FUN = "/") if (!is.null(y)) { if (!is.null(p$min) && is.null(p$min.y)) p$min.y <- p$min if (!is.null(p$min.y)) MIN <- rep_len(p$min.y, length.out = sum(m)) if (!is.null(p$ranges) && is.null(p$ranges.y)) p$ranges.y <- p$ranges if (!is.null(p$ranges.y)) r <- rep_len(p$ranges.y, length.out = sum(m)) y[m] <- sweep(sweep(y[m], 2, MIN), 2, r, FUN = "/") } } ## weights weights <- rep_len(if (is.null(p$weights)) 1 else p$weights, length.out = ncol(x)) p <- list(l = l, f = f, m = m, weights = weights) list(x = x, y = y, pairwise = pairwise, p = p, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "pr_Gower", names = "Gower", PREFUN = "pr_Gower_prefun", distance = FALSE, convert = "pr_simil2dist", type = NA, loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "Sum_k (s_ijk * w_k) / Sum_k (d_ijk * w_k)", reference = "Gower, J.C. (1971). A general coefficient of similarity and some of its properties. Biometrics, 27, pp. 857--871.", description = "The Gower Similarity for mixed variable types. w_k are variable weights. d_ijk is 0 for missings or a pair of FALSE logicals, and 1 else. s_ijk is 1 for a pair of TRUE logicals or matching factor levels, and the absolute difference for metric variables. Each metric variable is scaled with its corresponding range, provided the latter is not 0. Ordinal variables are converted to ranks r_i and the scores z_i = (r_i - 1) / (max r_i - 1) are taken as metric variables. Note that in the latter case, unlike the definition of Gower, just the internal integer codes are taken as the ranks, and not what rank() would return. This is for compatibility with daisy() of the cluster package, and will make a slight difference in case of ties. The weights w_k can be specified by passing a numeric vector (recycled as needed) to the 'weights' argument. Ranges (minimum) for scaling the columns of x and y can be specified using the 'ranges.x'/'ranges.y' ('min.x' / 'min.y') arguments (or simply 'ranges' ('min') for both x and y). In case of cross-proximities, if not specified via these arguments, both data frames are standardized together.") proxy/R/dissimilarities.R0000755000175100001440000004071113042403627015202 0ustar hornikuserspr_Euclidean <- function(x, y) sqrt(crossprod(x - y)) pr_Euclidean_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_Euclidean" } list(x = if (!is.list(x)) 0 + x else x, y = if (!is.null(y)) if (!is.list(y)) 0 + y else y, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_euclidean_dist", names = c("Euclidean","L2"), PREFUN = "pr_Euclidean_prefun", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "sqrt(sum_i (x_i - y_i)^2))", reference = "Cox, T.F., and Cox, M.A.A. (2001. Multidimensional Scaling. Chapmann and Hall.", description = "The Euclidean Distance (C implementation with compensation for excluded components)") pr_Mahalanobis <- function(x, y, cov) sqrt(mahalanobis(x, y, cov)) pr_Mahalanobis_prefun <- function(x, y, pairwise, p, reg_entry) { if (length(p) < 1) p <- list(cov(x, y)) list(x = x, y = y, pairwise = pairwise, p = p, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "pr_Mahalanobis", names = "Mahalanobis", PREFUN = "pr_Mahalanobis_prefun", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sqrt((x - y) Sigma^(-1) (x - y))", reference = "Mahalanobis P.C. (1936), On the generalised distance in statistics, Proceedings of the National Institute of Science of India 12, pp. 49-55", description = "The Mahalanobis Distance. The Variance-Covariance-Matrix is estimated from the input data if unspecified.") pr_Bhjattacharyya <- function(x, y) sqrt(crossprod(sqrt(x) - sqrt(y))) pr_DB$set_entry(FUN = "pr_Bhjattacharyya", names = "Bhjattacharyya", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sqrt(sum_i (sqrt(x_i) - sqrt(y_i))^2))", reference = "Bhattacharyya A. (1943). On a measure of divergence between two statistical populations defined by probability distributions, Bull. Calcutta Math. Soc., vol. 35, pp. 99--109", description = "The Bhjattacharyya Distance") pr_Manhattan <- function(x, y) sum(abs(x - y)) pr_Manhattan_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_Manhattan" } list(x = if (!is.list(x)) 0 + x else x, y = if (!is.null(y)) if (!is.list(y)) 0 + y else y, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_manhattan_dist", names = c("Manhattan", "City-Block", "L1", "taxi"), PREFUN = "pr_Manhattan_prefun", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "sum_i |x_i - y_i|", reference = "Cox, T.F., and Cox, M.A.A. (2001. Multidimensional Scaling. Chapmann and Hall.", description = "The Manhattan/City-Block/Taxi/L1-Distance (C implementation with compensation for excluded components)") pr_supremum <- function(x, y) max(abs(x - y)) pr_supremum_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_supremum" } list(x = if (!is.list(x)) 0 + x else x, y = if (!is.null(y)) if (!is.list(y)) 0 + y else y, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_maximum_dist", names = c("supremum", "max", "maximum", "Tschebyscheff", "Chebyshev"), distance = TRUE, PREFUN = "pr_supremum_prefun", convert = "pr_dist2simil", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "max_i |x_i - y_i|", reference = "Cox, T.F., and Cox, M.A.A. (2001. Multidimensional Scaling. Chapmann and Hall.", description = "The Maximum/Supremum/Chebyshev Distance (C implementation)") pr_Minkowski <- function(x, y, p = 2) (sum(abs(x - y) ^ p)) ^ (1/p) pr_Minkowski_prefun <- function(x, y, pairwise, p, reg_entry) { if (length(p$p) < 1) stop("Argument 'p' mandatory!") p <- p$p[[1L]] if (p <= 0) stop("'p' must not be smaller than 0.") if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_Minkowski" } list(x = if (!is.list(x)) 0 + x else x, y = if (!is.null(y)) if (!is.list(y)) 0 + y else y, pairwise = pairwise, p = p, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_minkowski_dist", names = c("Minkowski","Lp"), PREFUN = "pr_Minkowski_prefun", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "(sum_i (x_i - y_i)^p)^(1/p)", reference = "Cox, T.F., and Cox, M.A.A. (2001. Multidimensional Scaling. Chapmann and Hall.", description = "The Minkowski Distance (C implementation with compensation for excluded components)") pr_Canberra <- function(x, y) {tmp <- abs(x - y) / abs(x + y); sum(tmp[!is.nan(tmp)])} pr_Canberra_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_Canberra" } list(x = if (!is.list(x)) 0 + x else x, y = if (!is.null(y)) if (!is.list(y)) 0 + y else y, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_canberra_dist", names = "Canberra", PREFUN = "pr_Canberra_prefun", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "sum_i |x_i - y_i| / |x_i + y_i|", reference = "Cox, T.F., and Cox, M.A.A. (2001. Multidimensional Scaling. Chapmann and Hall.", description = "The Canberra Distance (C implementation with compensation for excluded components)") pr_WaveHedges <- function(x, y) sum(1 - min(x, y) / max(x, y)) pr_DB$set_entry(FUN = "pr_WaveHedges", names = c("Wave", "Hedges"), distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sum_i (1 - min(x_i, y_i) / max(x_i, y_i))", reference = "Cox, T.F., and Cox, M.A.A. (2001). Multidimensional Scaling. Chapmann and Hall.", description = "The Wave/Hedges Distance") pr_Divergence <- function(x, y) {tmp <- (x - y)^2 / (x + y)^2; sum(tmp[!is.nan(tmp)])} pr_DB$set_entry(FUN = "pr_Divergence", names = "divergence", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sum_i (x_i - y_i)^2 / (x_i + y_i)^2", reference = "Cox, T.F., and Cox, M.A.A. (2001). Multidimensional Scaling. Chapmann and Hall.", description = "The Divergence Distance") pr_KullbackLeibler <- function(x,y) { p <- x / sum(x); q <- y / sum(y); sum(p * log(p / q)) } pr_DB$set_entry(FUN = "pr_KullbackLeibler", names = c("Kullback", "Leibler"), distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sum_i [x_i * log((x_i / sum_j x_j) / (y_i / sum_j y_j)) / sum_j x_j)]", reference = "Kullback S., and Leibler, R.A. (1951). On information and sufficiency. The Annals of Mathematical Statistics, vol. 22, pp. 79--86", description = "The Kullback-Leibler-distance.") pr_BrayCurtis <- function(x, y) sum(abs(x - y)) / sum(x + y) pr_DB$set_entry(FUN = "pr_BrayCurtis", names = c("Bray","Curtis"), distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sum_i |x_i - y_i| / sum_i (x_i + y_i)", reference = "Bray J.R., Curtis J.T. (1957). An ordination of the upland forest of the southern Winsconsin. Ecological Monographies, 27, pp. 325--349", description = "The Bray/Curtis dissimilarity. Note that it is not a distance since it vioalates the triangle inequality.") pr_Soergel <- function(x, y) sum(abs(x - y)) / sum(max(x, y)) pr_DB$set_entry(FUN = "pr_Soergel", names = "Soergel", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sum_i |x_i - y_i| / sum_i max{x_i, y_i}", reference = "Cox, T.F., and Cox, M.A.A. (2001). Multidimensional Scaling. Chapmann and Hall.", description = "The Soergel Distance") pr_Levenshtein_prefun <- function(x, y, pairwise, p, reg_entry) { if (system.file(package="cba") == "") stop("Need package 'cba'!") else loadNamespace("cba") if (pairwise) stop("Pairwise distances not implemented by sdist()!") list(x = x, y = y, pairwise = pairwise, p = p, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "sdists", names = "Levenshtein", PREFUN = "pr_Levenshtein_prefun", convert = "pr_dist2simil", distance = TRUE, loop = FALSE, abcd = FALSE, C_FUN = FALSE, PACKAGE = "cba", formula = "Number of insertions, edits, and deletions between to strings", reference = "Levenshtein V.I. (1966). Binary codes capable of correcting deletions, insertions, and reversals. Soviet Physics Doklady 10, pp. 707--710", description = "Wrapper for sdists() in the cba-package (C implementation).") pr_Podani <- function(x, y) { a <- b <- c <- d <- 0 n <- length(x) for (i in seq_len(n - 1)) for(j in (i+1):n) { a <- a + (x[i] < x[j] && y[i] < y[j] || x[i] > x[j] && y[i] > y[j]) b <- b + (x[i] < x[j] && y[i] > y[j] || x[i] > x[j] && y[i] < y[j]) c <- c + (x[i] == x[j] && y[i] == y[j] && (x[i] == 0 && y[i] == 0 || x[i] > 0 && y[i] > 0)) z <- sum(x[i] == 0, x[j] == 0, y[i] == 0, y[j] == 0) d <- d + ((x[i] == x[j] || y[i] == y[j]) && z > 0 && z < 4) } 1 - 2 * (a - b + c - d) / (n * (n - 1)) } pr_DB$set_entry(FUN = "pr_Podani", names = c("Podani","discordance"), distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "1 - 2 * (a - b + c - d) / (n * (n - 1))", reference = "Podani, J. (1997). A measure of discordance for partially ranked data when presence/absence is also meaningful. Coenoses 12: 127--130.", description = "The Podany measure of discordance is defined on ranks with ties. In the formula, for two given objects x and y, n is the number of variables, a is is the number of pairs of variables ordered identically, b the number of pairs reversely ordered, c the number of pairs tied in both x and y (corresponding to either joint presence or absence), and d the number of all pairs of variables tied at least for one of the objects compared such that one, two, or thee scores are zero.") pr_chord <- function(x, y) sqrt(2 * (1 - crossprod(x, y) / sqrt(crossprod(x) * crossprod(y)))) pr_DB$set_entry(FUN = pr_chord, names = "Chord", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sqrt(2 * (1 - xy / sqrt(xx * yy)))", reference = "Orloci, L. 1967. An agglomerative method for classification of plant communities. J. Ecol 55:193--206.", description = "The Chord distance.") pr_geodesic <- function(x, y) acos(crossprod(x, y) / sqrt(crossprod(x) * crossprod(y))) pr_DB$set_entry(FUN = pr_geodesic, names = "Geodesic", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "arccos(xy / sqrt(xx * yy))", reference = "Orloci, L. 1967. Data centering: a review and evaluation with reference to component analysis. Syst. Zool. 16:208--212.", description = "The geoedesic distance, i.e. the angle between x and y.") pr_whittaker <- function(x, y) sum(abs(x / sum(x) - y / sum(y))) / 2 pr_DB$set_entry(FUN = pr_whittaker, names = "Whittaker", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sum_i |x_i / sum_i x - y_i / sum_i y| / 2", reference = "Whittaker, R.H. (1952) A study of summer foliage insect communities in the Great Smoky Mountains. Ecological Monographs 22, pp. 1--44.", description = "The Whittaker distance.") pr_hellinger <- function(x, y) sqrt(crossprod(sqrt(x / sum(x)) - sqrt(y / sum(y)))) pr_DB$set_entry(FUN = pr_hellinger, names = "Hellinger", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sqrt(sum_i (sqrt(x_i / sum_i x) - sqrt(y_i / sum_i y)) ^ 2)", reference = "Rao, C.R. (1995) Use of Hellinger distance in graphical displays. In E.-M. Tiit, T. Kollo, & H. Niemi (Ed.): Multivariate statistics and matrices in statistics. Leiden (Netherland): Brill Academic Publisher. pp. 143--161.", description = "The Hellinger distance.") pr_fJaccard <- function(x, y) sum(pmin(x, y)) / sum(pmax(x, y)) pr_fJaccard_prefun <- function(x, y, pairwise, p, reg_entry) { if (any(x < 0 | x > 1)) stop("Valid range for fuzzy measure: 0 <= x <= 1") if (!is.null(y) && any(y < 0 | y > 1)) stop("Valid range for fuzzy measure: 0 <= y <= 1") if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_fJaccard" } list(x = 0 + x, y = if (!is.null(y)) 0 + y else NULL, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_fuzzy_dist", names = c("fJaccard", "fuzzy_Jaccard"), PREFUN = "pr_fJaccard_prefun", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "sum_i (min{x_i, y_i} / max{x_i, y_i})", reference = "Miyamoto S. (1990). Fuzzy sets in information retrieval and cluster analysis, Kluwer Academic Publishers, Dordrecht.", description = "The fuzzy Jaccard dissimilarity (C implementation).") proxy/R/registry.R0000755000175100001440000003344013437463265013671 0ustar hornikusers################################### ### generic registry infrastructure ### IDEA: use lexical scope with nested functions to create an ### S3-"object" that exposes the data structure only through accessor functions. .FUNCall <- function(f) function(...) f(...) registry <- function(index_field = "names", entry_class = NULL, validity_FUN = NULL, registry_class = NULL, ignore_case = TRUE) { ### ATTRIBUTES ## repository DATA <- META <- list() ## permissions PERMISSIONS <- c(set_entries = TRUE, modify_entries = TRUE, delete_entries = TRUE, set_fields = TRUE) SEALED_FIELDS <- SEALED_ENTRIES <- character(0) ### METHODS (PRIVATE) ## helper functions .field_exists <- function(name) name %in% .get_field_names() .make_field <- function(default = NA, type = NA, is_mandatory = FALSE, is_modifiable = TRUE, validity_FUN = NULL) structure(list(type = type, default = default, is_mandatory = is_mandatory, is_modifiable = is_modifiable, validity_FUN = validity_FUN), class = "registry_field") .make_entry <- function(l) { ## sort l <- l[c(index_field, setdiff(.get_field_names(), index_field))] ## return object (possibly inheriting from entry_class) structure(l, class = c(entry_class, "registry_entry")) } .get_mandatory_fields <- function() names(META)[sapply(META, function(i) i$is_mandatory)] .get_field_defaults <- function() lapply(META, function(i) i$default) .get_entry_index <- function(name, stop_if_missing = TRUE) { ## returns the index of the first exact match (modulo case): index <- if (ignore_case) sapply(DATA, function(i) toupper(name) %in% toupper(i[[index_field]])) else sapply(DATA, function(i) name %in% i[[index_field]]) if (!any(index)) { if (stop_if_missing) stop(paste("Entry", dQuote(name), "not in registry.")) else return(NULL) } which(index)[1] } .check_value <- function(field_name, field, value) { ## Note we do not check NA entries because this may by set automatically if (!is.function(value) && !any(is.na(value))) { ## check class / list of alternatives, if any if (!any(is.na(field$type))) { ## check list of alternatives if (length(field$type) > 1) { if (!is.character(value) || !value %in% field$type) stop(paste("Possible values for", dQuote(field_name), "are:", paste(field$type, collapse = ", "))) ## check class } else if (!inherits(value, field$type)) { stop(paste("Field", dQuote(field_name), "does not inherit from class", field$type)) } } ## apply validity function, if any if (!is.null(field$validity_FUN)) do.call(field$validity_FUN, list(value)) } } .check_for_unknown_fields <- function(n) { ## check for fields not in repository missing <- !.field_exists(n) if (any(missing)) stop(paste("Field(s) not in repository:", paste(n[missing], collapse = ", "))) } ### METHODS (PUBLIC) ## field accessors .entry_exists <- function(name) if (ignore_case) toupper(name) %in% toupper(unlist(sapply(DATA, function(i) i[[index_field]]))) else name %in% unlist(sapply(DATA, function(i) i[[index_field]])) .get_field <- function(name) { if (!.field_exists(name)) stop(paste("Field", dQuote(name), "not in registry.")) META[[name]] } .get_fields <- function() META .get_field_names <- function() names(META) .set_field <- function(name, default = NA, type = NA, is_mandatory = FALSE, is_modifiable = TRUE, validity_FUN = NULL) { ## check permissions if (!PERMISSIONS["set_fields"]) stop("Setting of fields not allowed.") ## check for double entries if (.field_exists(name)) stop(paste("Field", dQuote(name), "already in registry.")) ## possibly, infer type from argment if (!any(is.na(type)) && !(is.character(type))) type <- class(type) ## check mandatory fields if (is_mandatory && !any(is.na(default))) stop("Mandatory fields should have no default.") ## create field entry field <- .make_field(type = type, default = default, is_mandatory = is_mandatory, validity_FUN = validity_FUN) ## check validity of default .check_value("default", field, default) ## add field to meta data META <<- c(META, list(field)) names(META)[length(META)] <<- name ## add (missing) fields to data entries DATA <<- lapply(DATA, function(i) {i[[name]] <- default; i}) } .n_of_entries <- function() length(DATA) ## entry accessors .set_entry <- function(...) { ## check permissions if (!PERMISSIONS["set_entries"]) stop("Setting of entries not allowed.") ## parameter handling l <- list(...) n <- names(l) .check_for_unknown_fields(n) ## check for mandatory fields mandatory_fields <- .get_mandatory_fields() missing_mandatory_fields <- !mandatory_fields %in% n if (any(missing_mandatory_fields)) stop(paste("The following fields are mandatory, but missing:", paste(mandatory_fields[missing_mandatory_fields], collapse = ", "))) ## check for double entries for (i in l[[index_field]]) if (.entry_exists(i)) stop(paste("Entry", dQuote(i), "already in registry.")) ## check defaults and set values, if needed field_defaults <- .get_field_defaults() default_fields <- names(field_defaults) missing_fields <- setdiff(default_fields, n) l[missing_fields] <- field_defaults[missing_fields] ## check field types, and apply field check function, if any. for (f in n) { meta <- .get_field(f) .check_value(f, .get_field(f), l[[f]]) } ## apply entry check function if (!is.null(validity_FUN)) do.call(validity_FUN, list(l)) ## add entry entry <- .make_entry(l) DATA <<- c(DATA, list(entry)) names(DATA)[length(DATA)] <<- l[[index_field]][1] } .get_entries <- function(names = NULL, pattern = NULL) { ## fix search if (!is.null(names)) { if (ignore_case) DATA[intersect(toupper(names), toupper(names(DATA)))] else DATA[intersect(names, names(DATA))] ## grep search } else if (!is.null(pattern)) { pattern_in_entry <- function(x) any(sapply(x, function(i) is.character(i) && length(grep(pattern, i) > 0))) DATA[sapply(DATA, pattern_in_entry)] ## else: return all entries } else DATA } .get_entry_names <- function() { if (length(DATA) < 1) character(0) else names(DATA) } .get_entry <- function(name, stop_if_missing = TRUE) { index <- .get_entry_index(name, stop_if_missing) if (is.null(index)) return(NULL) DATA[[index]] } .delete_entry <- function(name) { ## check permissions if (!PERMISSIONS["delete_entries"]) stop("Deletion of entries not allowed.") ## fetch entry index (this also checks if the entry exists) entry_index <- .get_entry_index(name) ## check sealed entries if (name %in% SEALED_ENTRIES) stop(paste("Deletion of entry", dQuote(name), "not allowed.")) ## delete it DATA[entry_index] <<- NULL } .modify_entry <- function(...) { ## check permissions if (!PERMISSIONS["modify_entries"]) stop("Modifying of entries not allowed.") ## parameter handling l <- list(...) n <- names(l) ## check for index field if (!index_field %in% n) stop(paste("Index field", dQuote(index_field), "missing.")) .check_for_unknown_fields(n) ## determine entry name name <- l[[index_field]][1] ## fetch entry index (this also checks if the entry exists) entry_index <- .get_entry_index(name) ## fetch entry entry <- DATA[[entry_index]] name <- entry[[index_field]][1] for (field in setdiff(n, index_field)) { ## check if field is modifiable field_entry <- .get_field(field) if (!field_entry$is_modifiable) stop(paste("Field", dQuote(field), "is not modifiable.")) ## check if entry and field are sealed if ((name %in% SEALED_ENTRIES) && (field %in% SEALED_FIELDS)) stop(paste("Modification of field", dQuote(field), "in entry", dQuote(name), "not allowed.")) ## check new value value <- l[[field]] .check_value(field, field_entry, value) ## modify entry locally entry[[field]] <- value } ## apply entry check function if (!is.null(validity_FUN)) do.call(validity_FUN, list(entry)) ## modify entry in registry DATA[entry_index] <<- list(entry) } ## get all entries for one field .get_field_entries <- function(field, unlist = TRUE) { if (!.field_exists(field)) stop(paste("Field", dQuote(field), "not in registry.")) ret <- lapply(DATA, function(i) i[[field]]) if (unlist) unlist(ret) else ret } ## permission getters/setters .get_permissions <- function() PERMISSIONS .restrict_permissions <- function(set_entries = TRUE, modify_entries = TRUE, delete_entries = TRUE, set_fields = TRUE) { PERMISSIONS["set_entries"] <<- PERMISSIONS["set_entries"] && set_entries PERMISSIONS["modify_entries"] <<- PERMISSIONS["modify_entries"] && modify_entries PERMISSIONS["delete_entries"] <<- PERMISSIONS["delete_entries"] && delete_entries PERMISSIONS["set_fields"] <<- PERMISSIONS["set_fields"] && set_fields } .seal_entries <- function() { SEALED_ENTRIES <<- .get_entry_names() SEALED_FIELDS <<- .get_field_names() } .get_sealed_entry_names<- function() SEALED_ENTRIES .get_sealed_field_names <- function() SEALED_FIELDS ### CONSTRUCTOR ## create index field .set_field(name = index_field, type = "character", is_mandatory = TRUE, is_modifiable = FALSE) ## return class structure(list(get_field = .FUNCall(.get_field), get_fields = .FUNCall(.get_fields), get_field_names = .FUNCall(.get_field_names), set_field = .FUNCall(.set_field), entry_exists = .FUNCall(.entry_exists), get_entry = .FUNCall(.get_entry), get_entries = .FUNCall(.get_entries), get_entry_names = .FUNCall(.get_entry_names), set_entry = .FUNCall(.set_entry), modify_entry = .FUNCall(.modify_entry), delete_entry = .FUNCall(.delete_entry), n_of_entries = .FUNCall(.n_of_entries), get_field_entries = .FUNCall(.get_field_entries), get_permissions = .FUNCall(.get_permissions), restrict_permissions = .FUNCall(.restrict_permissions), seal_entries = .FUNCall(.seal_entries), get_sealed_entry_names = .FUNCall(.get_sealed_entry_names), get_sealed_field_names = .FUNCall(.get_sealed_field_names) ), class = c(registry_class, "proxy_registry")) } "[[.proxy_registry" <- function(x, i) x$get_entry(i) length.proxy_registry <- function(x) x$n_of_entries() print.proxy_registry <- function(x, ...) { l <- x$n_of_entries() if (l < 1) writeLines(paste("An object of class", dQuote("registry"), "with no entry.")) else if (l == 1) writeLines(paste("An object of class", dQuote("registry"), "with one entry.")) else writeLines(paste("An object of class", dQuote("registry"), "with", l, "entries.")) } print.registry_field <- function(x, ...) writeLines(formatUL(x, label = names(x), ...)) print.registry_entry <- function(x, ...) { x <- .functions_to_characters(x) x[[1]] <- paste(x[[1]], collapse = ", ") writeLines(formatUL(x, label = names(x))) } summary.proxy_registry <- function(object, ...) as.data.frame(object, ...) as.data.frame.proxy_registry <- function(x, ...) do.call(rbind, lapply(x$get_entries(), function(entry) { entry <- .functions_to_characters(entry) data.frame(unclass(entry[-1]), ...) } ) ) .functions_to_characters <- function(x) { ## transform function entries into character strings funs <- sapply(x, inherits, "function") for (field in names(x)[funs]) x[[field]] <- paste(format(x[[field]]), collapse = "") x } proxy/R/seal.R0000755000175100001440000000007410642723444012733 0ustar hornikusers### seal entries of proximity database pr_DB$seal_entries() proxy/R/dist.R0000644000175100001440000003067714140004463012751 0ustar hornikusersdist <- function(x, y = NULL, method = NULL, ..., diag = FALSE, upper = FALSE, pairwise = FALSE, by_rows = TRUE, convert_similarities = TRUE, auto_convert_data_frames = TRUE) { ### PARAMETER HANDLING ## convenience hack to allow dist(x, "method") if ((is.function(y) || is.character(y)) && is.null(method)) { method <- y y <- NULL } ## transform data frame into matrix iff all columns are atomic and either numeric (integer or double) or logical or complex is.n_l_c <- function(x) all(sapply(x, is.numeric)) || all(sapply(x, is.logical)) || all(sapply(x, is.complex)) if (is.data.frame(x) && auto_convert_data_frames && is.n_l_c(x)) x <- as.matrix(x) if (is.data.frame(y) && !is.null(y) && auto_convert_data_frames && is.n_l_c(y)) y <- as.matrix(y) ## vector handling if (is.vector(x) && is.atomic(x)) x <- as.matrix(x) if (!is.null(y) && is.vector(y) && is.atomic(y)) y <- as.matrix(y) ## method lookup reg_entry <- NULL if (is.null(method)) method <- if (is.data.frame(x)) "Gower" else if (is.logical(x)) "Jaccard" else "Euclidean" if (!is.function(method)) reg_entry <- if (inherits(method, "proxy_registry_entry")) method else pr_DB$get_entry(method) ## some checks if (!is.data.frame(x) && !is.matrix(x) && !is.list(x)) stop("Can only handle data frames, vectors, matrices, and lists!") if ( is.data.frame(x) && !by_rows) stop("Cannot transpose mixed data frames") if (!is.null(y)) { if (is.data.frame(x) && !is.data.frame(y) || is.matrix(x) && !is.matrix(y) || is.list(x) && !is.list(y)) stop("x and y must be of same type.") if (is.matrix(x) && is.matrix(y) || is.data.frame(x) && is.data.frame(y)) if (by_rows && (ncol(x) != ncol(y))) stop("x and y must be conform in columns.") else if (!by_rows && (nrow(x) != nrow(y))) stop("x and y must be conform in rows.") } ### PREPROCESS params <- list(...) if (!is.null(reg_entry)) { if(is.function(reg_entry$PREFUN) || is.character(reg_entry$PREFUN)) { tmp <- do.call(reg_entry$PREFUN, c(list(x, y, pairwise, params, reg_entry))) if (!is.null(tmp)) { x <- tmp$x y <- tmp$y pairwise <- tmp$pairwise params <- tmp$p reg_entry <- tmp$reg_entry } } method <- reg_entry$FUN } ## helper function for calling the C-level loops .proxy_external <- function(CFUN, x, y) do.call(".External", c(list(CFUN, x, y, pairwise, if (!is.function(method)) get(method) else method), params ) ) result <- ### PASS-THROUGH-cases if (!is.null(reg_entry) && !reg_entry$loop) { if (!by_rows && !is.list(x)) { x <- t(x) if (!is.null(y)) y <- t(y) } if (reg_entry$C_FUN) { do.call(".Call", c(list(method), list(x), list(y), pairwise, params, list(PACKAGE = reg_entry$PACKAGE))) } else ## user functions need not implement pairwise do.call(method, c(list(x), list(y), params), envir = asNamespace(reg_entry$PACKAGE)) } else if (is.null(y)) { ### LOOP WORKHORSE for auto-proximities ## transpose data for column-wise loop if (!by_rows && !is.list(x)) x <- t(x) if (is.list(x) && !is.null(reg_entry) && reg_entry$abcd) x <- do.call("rbind", x) if (is.matrix(x) && !is.null(reg_entry) && reg_entry$abcd) ## binary matrix .proxy_external(R_apply_dist_binary_matrix, x != 0, NULL) else if (is.matrix(x)) ## real, integer matrix .proxy_external(R_apply_dist_matrix, x, NULL) else if (is.list(x) && !(is.data.frame(x) && by_rows)) ## list .proxy_external(R_apply_dist_list, x, NULL) else ## data frame (by rows) .proxy_external(R_apply_dist_data_frame, x, NULL) } else { ### LOOP WORKHORSE for cross-proximities ## transpose data for column-wise loop if (!by_rows && !is.list(x)) { x <- t(x) y <- t(y) } if (is.list(x) && !is.null(reg_entry) && reg_entry$abcd) { x <- do.call("rbind", x) y <- do.call("rbind", x) } if (is.matrix(x) && !is.null(reg_entry) && reg_entry$abcd) ## binary matrices .proxy_external(R_apply_dist_binary_matrix, x != 0, y != 0) else if (is.matrix(x)) ## real, integer matrices .proxy_external(R_apply_dist_matrix, x, y) else if (is.list(x) && !(is.data.frame(x) && by_rows)) ## lists .proxy_external(R_apply_dist_list, x, y) else ## data frames (by rows) .proxy_external(R_apply_dist_data_frame, x, y) } ### set col/rownames for cross-proximity-objects (if needed) if (is.matrix(result) && is.null(dimnames(result))) if (is.list(x) && !is.data.frame(x)) { rownames(result) <- names(x) colnames(result) <- names(y) } else if (by_rows) { rownames(result) <- rownames(x) colnames(result) <- rownames(y) } else { rownames(result) <- colnames(x) colnames(result) <- colnames(y) } ### POSTPROCESS if (!is.null(reg_entry)) { if (is.function(reg_entry$POSTFUN) || is.character(reg_entry$POSTFUN)) result <- do.call(reg_entry$POSTFUN, c(list(result, params))) if (!reg_entry$distance && !(is.logical(convert_similarities) && !convert_similarities)) { result <- if (is.function(convert_similarities) || is.character(convert_similarities)) do.call(convert_similarities, list(result)) else if (is.null(reg_entry$convert)) pr_simil2dist(result) else do.call(reg_entry$convert, list(result)) } method <- reg_entry$names[1] } ### RETURN DIST-OBJECT result <- if (is.matrix(result)) structure(result, class = "crossdist") else if (inherits(result, "dist")) structure(result, Diag = diag, Upper = upper) else structure(result, class = "pairdist") structure(result, method = if (is.character(method)) method else if (missing(method)) deparse(substitute(y)) else deparse(substitute(method)), call = match.call()) } simil <- function(x, y = NULL, method = NULL, ..., diag = FALSE, upper = FALSE, pairwise = FALSE, by_rows = TRUE, convert_distances = TRUE, auto_convert_data_frames = TRUE) { ## convenience to allow dists(x, "method") if ((is.function(y) || is.character(y)) && is.null(method)) { method <- y y <- NULL } if (is.null(method)) method <- if (is.data.frame(x)) "Gower" else if (is.logical(x)) "Jaccard" else "correlation" ret <- dist(x, y, method, ..., diag = diag, upper = upper, pairwise = pairwise, by_rows = by_rows, convert_similarities = FALSE, auto_convert_data_frames = auto_convert_data_frames) ## possibly convert to similarity reg_entry <- pr_DB$get_entry(attr(ret, "method"), stop_if_missing = FALSE) if (!is.null(reg_entry)) { if (reg_entry$distance && !(is.logical(convert_distances) && !convert_distances)) { ret <- if (is.function(convert_distances) || is.character(convert_distances)) do.call(convert_distances, list(ret)) else if (is.null(reg_entry$convert)) pr_dist2simil(ret) else do.call(reg_entry$convert, list(ret)) } } class(ret) <- unique(c(if (inherits(ret, "crossdist")) "crosssimil" else "simil", class(ret))) ret } # note that a simil object must always also be a dist # object for method dispatch as.simil <- function(x, FUN = NULL) { if (inherits(x, c("simil", "crosssimil"))) x else if (inherits(x, c("dist", "crossdist"))) { class(x) <- if (inherits(x, "dist")) c("simil", class(x)) else c("crosssimil", setdiff(class(x), "crossdist")) if (!is.null(FUN)) FUN(x) else { reg_entry <- NULL if (!is.null(attr(x, "method"))) reg_entry <- pr_DB$get_entry(attr(x, "method"), stop_if_missing = FALSE) if (!is.null(reg_entry) && !is.null(reg_entry$convert)) do.call(reg_entry$convert, list(x)) else pr_dist2simil(x) } } else structure(stats::as.dist(x), class = c("simil", "dist")) } as.dist <- function(x, FUN = NULL) { if (inherits(x, c("simil", "crosssimil"))) { class(x) <- if (inherits(x, "simil")) setdiff(class(x), "simil") else c("crossdist", setdiff(class(x), "crosssimil")) if (!is.null(FUN)) FUN(x) else { reg_entry <- NULL if (!is.null(attr(x, "method"))) reg_entry <- pr_DB$get_entry(attr(x, "method"), stop_if_missing = FALSE) if (!is.null(reg_entry) && !is.null(reg_entry$convert)) do.call(reg_entry$convert, list(x)) else pr_simil2dist(x) } } else if (inherits(x, c("dist", "crossdist"))) x else stats::as.dist(x) } ## as we do not know if the object is the result of some ## user-defined transformation the values of ## s(x,x) are not defined. ## we need to copy stats::as.matrix.dist() since the use of ::: is deprecated: as.matrix <- function(x, ...) base::as.matrix(x, ...) as_matrix_dist <- function (x, ...) { size <- attr(x, "Size") df <- matrix(0, size, size) df[row(df) > col(df)] <- x df <- df + t(df) labels <- attr(x, "Labels") dimnames(df) <- if (is.null(labels)) list(seq_len(size), seq_len(size)) else list(labels, labels) df } as.matrix.simil <- function(x, diag = NA, ...) { x <- as_matrix_dist(x) diag(x) <- diag x } ## however, it seems reasonable to assume that d(x,x)=0, ## which is also the default in stats. as.matrix.dist <- function(x, diag = 0, ...) { x <- as_matrix_dist(x) diag(x) <- diag x } print.crossdist <- print.crosssimil <- function (x, digits = getOption("digits"), justify = "none", right = TRUE, ...) { if (length(x) > 0) { m <- as.matrix(x) cf <- format(m, digits = digits, justify = justify) print(cf, quote = FALSE, right = right, ...) } else { cat(data.class(x), "(0)\n", sep = "") } invisible(x) } print.pairdist <- function(x, ...) { print(as.vector(x), ...) invisible(x) } print.simil <- function (x, diag = NULL, upper = NULL, digits = getOption("digits"), justify = "none", right = TRUE, ...) { if (length(x)) { if (is.null(diag)) diag <- if (is.null(a <- attr(x, "Diag"))) FALSE else a if (is.null(upper)) upper <- if (is.null(a <- attr(x, "Upper"))) FALSE else a m <- as.matrix(x) if (diag) diag(m) <- NA cf <- format(m, digits = digits, justify = justify) if (!upper) cf[row(cf) < col(cf)] <- "" if (!diag) cf[row(cf) == col(cf)] <- "" print(if (diag || upper) cf else cf[-1, -attr(x, "Size"), drop = FALSE], quote = FALSE, right = right, ...) } else { cat(data.class(x), "(0)\n", sep = "") } invisible(x) } pr_simil2dist <- function(x) 1 - abs(x) pr_dist2simil <- function(x) 1 / (1 + x) ### proxy/R/database.R0000755000175100001440000000561512015644530013553 0ustar hornikusers###################### ### proximity database ###################### ### check functions .function_or_character <- function(x) { if (!is.character(x) && !is.function(x)) stop("Need function or function name.") } .abcd_and_binary <- function(x) { if (x$abcd && (x$type != "binary")) stop(paste(dQuote("abcd"), "mode only available for binary measures.")) } ### create registry pr_DB <- registry(registry_class = "pr_DB", entry_class = "proxy_registry_entry", validity_FUN = ".abcd_and_binary") ## create fields pr_DB$set_field("FUN", is_mandatory = TRUE, validity_FUN = ".function_or_character") pr_DB$set_field("distance", type = "logical", default = TRUE) pr_DB$set_field("PREFUN", validity_FUN = ".function_or_character") pr_DB$set_field("POSTFUN", validity_FUN = ".function_or_character") pr_DB$set_field("convert", validity_FUN = ".function_or_character") pr_DB$set_field("type", type = c("binary", "nominal", "ordinal", "metric", "other"), default = "other") pr_DB$set_field("loop", type = "logical", default = TRUE) pr_DB$set_field("C_FUN", type = "logical", default = FALSE) pr_DB$set_field("PACKAGE", type = "character", default = "proxy") pr_DB$set_field("abcd", type = "logical", default = FALSE) pr_DB$set_field("formula", type = "character") pr_DB$set_field("reference", type = "character") pr_DB$set_field("description", type = "character") ### summary and print methods summary.pr_DB <- function(object, verbosity = c("short", "long"), ...) { if (length(object) < 1) return(object) verbosity <- match.arg(verbosity) object <- switch(verbosity, short = list(names = object$get_field_entries("names", unlist = FALSE), distance = object$get_field_entries("distance")), long = list(names = object$get_field_entries("names", unlist = FALSE), distance = object$get_field_entries("distance"), type = object$get_field_entries("type"), formula = object$get_field_entries("formula")) ) structure(object, class = "summary.pr_DB") } print.summary.pr_DB <- function(x, ...) { distance <- c("Similarity", "Distance")[x[[2]] + 1] if (length(x) > 2) x[[3]][is.na(x[[3]])] <- "other" for (i in unique(distance)) { ind <- which(distance == i) if (length(ind) > 0) { writeLines(paste("*", i, "measures:")) if (length(x) > 2) { for (k in ind) writeLines(paste(" ", paste(x[[1]][[k]], collapse = "/"), " (", x[[3]][k], ") = ", x[[4]][k], sep = "")) } else { tmp <- sort(sapply(x[[1]][ind], function(i) i[1])) writeLines(strwrap(paste(tmp, collapse = ", "))) } } writeLines("") } } proxy/MD50000644000175100001440000000324714250310004011754 0ustar hornikusers95ed06c3ebdc548d17ba29338d80b915 *DESCRIPTION e499bda510adfcd4965b84687f11f0e6 *NAMESPACE efa5c729c0744f2fdb2a9f970f982ef7 *R/database.R 1c71957d655b0e93bac59105b9686697 *R/dissimilarities.R 9c34cf415eace2a9c387eea5ba579064 *R/dist.R cdaabc587c5d0131b79932da2032f0c3 *R/registry.R 71b44c779ee8e97b5af57b7ed472d448 *R/seal.R b590aff80f0e19ea1210258ed4c15ab2 *R/similarities.R 93c8ed420c2081297b7bc2d15a910af4 *R/util.R 34d2a9328c715649558d9e4803e1137d *TODO 19f6f5ceb4d5bb78593dcd0ab5cd38f0 *build/vignette.rds cb7b10c1a035711f504ea564da39ad33 *inst/NEWS.Rd bcc756854674661ee0886c0c7d3b6c18 *inst/doc/overview.R ec24a75149c7ad35c68b86f731dd0d7b *inst/doc/overview.Rnw 0a5ac19d56e6a730d4bc9bc57fa0b0ae *inst/doc/overview.pdf 1970ec51a77e511d556f35f5caedf8d3 *man/dist.Rd f17e45dbaa4de733ae46e4b46fd9f503 *man/registry.Rd d9e34eea7ac668a0dc3798ec3e476e9f *man/rowSums.dist.Rd ea3fccbab55400cec52145ae1cd6a27e *src/apply.c b4a706d255fa2aeb729a733b122586af *src/arrayIndex.c 27a3ea78d0556f8e4bf560a214ec2aad *src/distance.c 5f5f6926767c934be5dee4d32e6e1e98 *src/dll.c bc14e759c2f7a7cab49e46df2199d053 *src/util.c 13acd6f763d4de9893a20051e4be759a *tests/apply.R 78e117321ea3d293909a0319222c3c90 *tests/apply.Rout.save 14e496e16fd3d9b13fa15009bfe815bf *tests/distance.R eae066df7e690fa00bbae6c835ff78dd *tests/distance.Rout.save 27b436b4f387638e82c91b50af2a6722 *tests/distcalls.R f9dae5cb7c379cbed1dd137ee6027851 *tests/distcalls.Rout.save 4f1d3a7ca18b4ac25cc3fc1b3b9707d1 *tests/registry.R bf59ffa18d3b12a25ccd66c382f531cf *tests/registry.Rout.save fd828b8c89d8e47753b9f79a2dc73957 *tests/util.R 7e8a4f5895205fb8770d34499c8796f6 *tests/util.Rout.save ec24a75149c7ad35c68b86f731dd0d7b *vignettes/overview.Rnw proxy/inst/0000755000175100001440000000000014250213102012414 5ustar hornikusersproxy/inst/doc/0000755000175100001440000000000014250213102013161 5ustar hornikusersproxy/inst/doc/overview.Rnw0000755000175100001440000000145711714440422015542 0ustar hornikusers\documentclass{article} \title{Proximity measures in the \texttt{proxy} package for {\sf R}} \author{David Meyer} %\VignetteIndexEntry{Proximity measures in the proxy package for R} %\VignetteDepends{proxy} %\VignetteKeywords{similarities,dissimilarities,distance} %\VignettePackage{proxy} <>= library(proxy) x <- summary(pr_DB, "long") FUN <- function(index) { for (i in which(index)) { writeLines(sprintf("Aliases: %s", paste(x$names[[i]], collapse = ", "))) writeLines(sprintf("Type : %s", x$type[i])) writeLines(sprintf("Formula: %s\n", x$formula[i])) } } @ \begin{document} \maketitle \section{Similarities} <>= FUN(x$distance == FALSE) @ \section{Dissimilarities} <>= FUN(x$distance == TRUE) @ \end{document} proxy/inst/doc/overview.R0000644000175100001440000000151214250213102015151 0ustar hornikusers### R code from vignette source 'overview.Rnw' ################################################### ### code chunk number 1: preliminaries ################################################### library(proxy) x <- summary(pr_DB, "long") FUN <- function(index) { for (i in which(index)) { writeLines(sprintf("Aliases: %s", paste(x$names[[i]], collapse = ", "))) writeLines(sprintf("Type : %s", x$type[i])) writeLines(sprintf("Formula: %s\n", x$formula[i])) } } ################################################### ### code chunk number 2: overview.Rnw:24-25 ################################################### FUN(x$distance == FALSE) ################################################### ### code chunk number 3: overview.Rnw:29-30 ################################################### FUN(x$distance == TRUE) proxy/inst/doc/overview.pdf0000755000175100001440000012013114250213102015523 0ustar hornikusers%PDF-1.5 % 3 0 obj << /Length 588 /Filter /FlateDecode >> stream xڵUn0+xЈ7Q̭E. nĄZRXs͛j2 A9Rx [Yx$}x>fZUu+F{xfXb$"G+CX0 'P=h(Hr1$Ec$9'-2@EQGi֖&u(dSDsYk[+ q!>K|ka8gJ7.dH68Y1tQS*]x>'TXsgrU6MUcupq`.89%9eV)g&|xy vRb|uɫoxLDs]4Z @dyQm{{֗9#x8{O]r3 nN\GoxeXLi;Ү^uUL:=^*LkuzVI6:?޽1_5߱/*s[>ӷe۪߸ȓ0]NէE: endstream endobj 15 0 obj << /Length 525 /Filter /FlateDecode >> stream xڵV]o0}ϯ(зe[&EV^,q`@k d>as?rν^ F{3;q]{- .lB#rc h4_a9w ΐ:N#⥣.S`U"ھ|)HV 39R=I6z=j`u6%CO ̦$77?> GiIetk&&?je@) Ub=co( RլhʄnjF)ԣvi,9Zcp1qxFFf664UZޯU u]/o(>jxX e{aثJMΪk-u?Ɵޠtbr7,a:Tv1&e;S4z!.?vs:/)j> stream xŖKo0I $@L޺êReuCĀZ/~ B bacy؟bF脁ѓs 8ޭƼ0Qin vMX:!rnTP7T`8]S/3UrȏˊUqʼnn;<dӺV4'"< U%eWr!@:y*cӡ{ dDUHҸQq_##suFw*e _AG"܊ѡ2DrD7k\%1ASCL&T-hՃmnK GfX$oDRcN a*mX6 t˟>pS' ) ߮<0zmW94@n %j )0yF$U"gYm|%-wz<2q7!wg7.0OU+wZQb篤 t 0lмBLٷh^k endstream endobj 21 0 obj << /Length 619 /Filter /FlateDecode >> stream xڵVK@+8bVn)TkQF lj|Q6F"1=ӿs 8x,4`i;4-Ղ[]@) !F|]7}X@3,`H$~1~yWt> stream xڵUK0W.$muJB+ȍ"!Hx6nSu%ɞxod%F`p<^ ,l#FȌODЂϧqA|j9'/17,DZw1;l2OvvEFXxkj"*ءrʢeڛ]G)JM$w8c񟐴ےL?KۖW bka{2+9e WF(T/3T#:KF=OLb~ݲ:s2 'k͝A?C*o4{/琓QR^mjmvrsO z 4H{ p .j(m_!> stream x3135R0P0bc3SSCB.# I$r9yr+Yp{E=}JJS ]  b<]0001; aX*6T?0'W N endstream endobj 26 0 obj << /Length 228 /Filter /FlateDecode >> stream xmαJ@o"0M^ป'pWSZY `eh>J+5E~;Yct_^iC-/+9u'Zst }{} ,, %s'l"aAZқMY'W Tc| endstream endobj 27 0 obj << /Length 235 /Filter /FlateDecode >> stream xu1N0ЉRX`3',ZiY$R AE GQr[0"OʌǓ/^ҟ+Vɾݭ%+yxb>F:iy-29Q EPE6fLV&b&e6fՎY (y/ifU _ cBԨM>y2_ |Ǜjh endstream endobj 28 0 obj << /Length 188 /Filter /FlateDecode >> stream xڕν @ + At-('𮶵kotrP?Q_ I+F!=ړ,o)$G$'KROt8oH&{$S^zVSBĢ iAf1h.p;`Z \2oߛy544` endstream endobj 29 0 obj << /Length 226 /Filter /FlateDecode >> stream xڕϿjAna s=b!j WJ!`R nGG8̜EH:_1;dySpnyΟ9)_6[d?9oR&[}";YL9#;e銊Һ„pQ*+j .+xs7xĕ\ }rR /:tKuNTc'ې'jiT2Dׂ+X endstream endobj 33 0 obj << /Length 137 /Filter /FlateDecode >> stream x%; 1F;]]hL!he!Vjih7eIY@5`NKnn;[.>Yʬz8nQuĥ>W#D*L"QCĶ5e" ьwO)B endstream endobj 34 0 obj << /Length 192 /Filter /FlateDecode >> stream xڅ1PDPl Ċ1D+ cmq@IA;WL0 v xlagnEt4'g'Ty!n{> stream xڅO; Pl {I*L!he!Vj)h-G,-$q̃T;LNuihuɗV'/2O4Ĭxq7 $$M | ,G\W{F9^ـ"J[|rY"ֱ4nT?pGrjݬc_e*[M* endstream endobj 36 0 obj << /Length 96 /Filter /FlateDecode >> stream x313T0P0T5W02S0PH1*2 (Bes≮=\ %E\N \. ц \. (\\\&Q# endstream endobj 37 0 obj << /Length 114 /Filter /FlateDecode >> stream x313T0P04W5W01T0PH1*22(Bs<=\ %E\N \. ц \. a`?r 5ez endstream endobj 38 0 obj << /Length 116 /Filter /FlateDecode >> stream x313T0P0V5W02W0PH1*22 (Bds<=\ %E\N \. ц \. c``pzrrlI endstream endobj 39 0 obj << /Length 175 /Filter /FlateDecode >> stream xڵ 0DQXK'2҆  * D h%##6HWYM0p sf؜Tz2{XKf1)Kd*rdGR/RA-%a|ݠЂV$QoeUG+O;a endstream endobj 40 0 obj << /Length 171 /Filter /FlateDecode >> stream xڵ 0EQ  miCp  (0 i~ϧ{~37 <& ~9JϓJu }s7&xܟnKœ(4^Jq^.JNQr?)F#PQ1H)3R;;J~.؆xC?ZOYb endstream endobj 41 0 obj << /Length 104 /Filter /FlateDecode >> stream x313T0P0UеP0T5RH1*26 (A$s<≠=}JJS ]  b<]'W * endstream endobj 42 0 obj << /Length 171 /Filter /FlateDecode >> stream x313T0P0S0W0P01VH1*26(%s< =\ %E\N @QhX.OXǏ?1 ɁԀԂ2} pzrrxS endstream endobj 43 0 obj << /Length 116 /Filter /FlateDecode >> stream x313T0P0V0S01T01QH1*26E-ɹ\N\ \@Q.}O_T.}gC.}hCX.O A-4v@ ù\=emH endstream endobj 44 0 obj << /Length 136 /Filter /FlateDecode >> stream x313T0P04U54R0 R M F0\.'O.pC.}BIQi*S!BA,???PP'W ,5 endstream endobj 45 0 obj << /Length 99 /Filter /FlateDecode >> stream x313T0P04F )\\@$lIr p{IO_T.}g E!'EA0XAՓ+ ; endstream endobj 46 0 obj << /Length 157 /Filter /FlateDecode >> stream x313T0P0U5W0T0PH1*26 (Bds<=\ %E\N \. ц \. @#HD؁:Q'@&> f0d82>3 df Dpzrr@: endstream endobj 47 0 obj << /Length 107 /Filter /FlateDecode >> stream x313T0P04F f )\\@ IrW04 s{*r;8+E]zb<]:\={-= endstream endobj 48 0 obj << /Length 103 /Filter /FlateDecode >> stream x313T0P0W04S06W02TH1*2 (B$s<,=L=}JJS ]  b<]0 szrr$~ endstream endobj 49 0 obj << /Length 184 /Filter /FlateDecode >> stream xm=` .߁1D'㤎]ċ8p n #~$(}L> stream x}0K:#pO`i1NI4 Kd0FMj\ijx@½%\PPGL2P[2;|=7P~K<Ls 9y|9#l K#vӜ_[ZCN _CF,a8[NXTQ endstream endobj 51 0 obj << /Length 218 /Filter /FlateDecode >> stream xڝ1N@4QY AT (Ar 3AzWJ_kN|y9H/vI'Zun8-)\ؙBwoVWg)6r}Gݚ3J~ ZTMa.)- o̤/`tR27V֯ifhh`+-RN]dvg9 endstream endobj 52 0 obj << /Length 183 /Filter /FlateDecode >> stream x313T0P0bCSCCB.c I$r9yr+[p{E=}JJS|hCX.OD|?b0 AD}&> f0H0b!On%rv?s?>  `szrrǁG endstream endobj 53 0 obj << /Length 147 /Filter /FlateDecode >> stream x313T0P0b#SCCB.c HrW0r{*r;8+. ц \.    `|$lthvb)،6 Q .WO@.̌r endstream endobj 54 0 obj << /Length 145 /Filter /FlateDecode >> stream x313T0P0bCSCCB.c I$r9yr+[p{E=}JJS|hCX.OH" $`@CLmQD !( ,x endstream endobj 55 0 obj << /Length 227 /Filter /FlateDecode >> stream xڍ=N@\4PY AT(PR$ގk 7eUI"Q|{;5袥aC]8> stream x313T0P0b#SCCB.c HrW0r{*r;8+. ц \. ?c4 N%'W  endstream endobj 57 0 obj << /Length 156 /Filter /FlateDecode >> stream x313T0P0U5T0҆ )\\&@A "ɥ`l¥U()*Mw pV0wQ6T0tQ``HX`'$@DD?`AH?` @OjhPՓ+ UX endstream endobj 58 0 obj << /Length 218 /Filter /FlateDecode >> stream xE=n@E.,MvNm M,#EPR%)SB9QPr.]lȢOLt&c&FRf1K~|U.k9s endstream endobj 59 0 obj << /Length 123 /Filter /FlateDecode >> stream x313T0P0bCSCCB.cs I$r9yr+s{E=}JJS|hCX.OLŘN|? ?*f endstream endobj 60 0 obj << /Length 177 /Filter /FlateDecode >> stream x313T0P0b#SCCB.c HrW0r{*r;8+. ц \.  B`W${1y 01h͇q|Fa  l?`!'W , endstream endobj 61 0 obj << /Length 194 /Filter /FlateDecode >> stream xU-@%&c 迨 P$u[GEev K1h8&nL؃-;CFXA_>pi ?!&+R"c(ɉ(N+ƵGSroW\"Ϡ+tIߣmśh5| dXB]/qs| endstream endobj 62 0 obj << /Length 170 /Filter /FlateDecode >> stream xŐ1 @ERxt)R-n!he!VB9EqW7seϨxAƘxң3U5ݮr 쀾"h `,T'uID x/H 9 Zpqol endstream endobj 63 0 obj << /Length 174 /Filter /FlateDecode >> stream x313T0P0bSCCB.cs I$r9yr+s{E=}JJS|hCX.O0"370`H؃@`?#^^Q`Cƃ-Y  f $700 F"b\\\wN endstream endobj 64 0 obj << /Length 197 /Filter /FlateDecode >> stream xڕС0jrf{::"#a e0XvtmCOh)T^ aLiOvG ֤FscT,r0ʖSiNfEN`Y9Q3pqNN3O0n ZJ4&}5ty+A -ؼ+ԀW2>z endstream endobj 65 0 obj << /Length 236 /Filter /FlateDecode >> stream xu1N@ E"a|$H" * DH$*\!G2HQwmT 娔DJsՠg?x#Um<>r\Iq+wn˜24wC0MLNLtA 9a=tC68yF̛aO2/a<&E>oxv endstream endobj 66 0 obj << /Length 124 /Filter /FlateDecode >> stream x313T0P0b#SCCB.c HrW0r{*r;8+. ц \. @†H0 z(QՓ+ +T endstream endobj 67 0 obj << /Length 197 /Filter /FlateDecode >> stream xڍϯ P#)>tœ &5m.b_CYN wzto,NvE69Wh .-rZeD/@sL@56Mo%n} :}v%$@FTiXz[V!zyM-+_X=Ey>J3CN.{K endstream endobj 68 0 obj << /Length 192 /Filter /FlateDecode >> stream xڭ= @ )"U F0Xmb aҔ)®p)6 GqBQ@O[SQ6{ t&NExޡ9OA q@#~8 7ŝm'ch/m:^[ endstream endobj 69 0 obj << /Length 191 /Filter /FlateDecode >> stream xm= @ x Ղ?` A+ RK E[)S,;h%Xfh< }:ex\T:8^pVQ>EmqF;)C}FE$ sXBט^Hȃ@?|bezYETZ_q-`R!a~K<.Kj/\ endstream endobj 70 0 obj << /Length 187 /Filter /FlateDecode >> stream xڝ= @g"#Xraˀ!N;GYg!BR@[]/w%ܔ|q&?,Lƹ+x"ҡ@yRx -0遍~*?umֽr!0e] EӐ`%Ж*sz endstream endobj 71 0 obj << /Length 182 /Filter /FlateDecode >> stream xڍ1 @EIk9 n!he!Vjihh%GL2Φօ}g?ofǜlS>'t#k5?;2{Zd܆L]rBC\"iJzD=[5/jLAOQ~ߏ@B_Zh4J5Ϋ^RMuZ9uEJ endstream endobj 72 0 obj << /Length 193 /Filter /FlateDecode >> stream xڕα@ .<} L &`qRG;[pqᾤ 5)+H+9s<^&|XLפ*L,r0S⺡MNMC $z11wx!"><Zi&N?>cH RaH'c ˁ:ѴmO, YK endstream endobj 73 0 obj << /Length 201 /Filter /FlateDecode >> stream xmPE4K BBrmM>}}V́;ܹiԥS=T'u9&a+NFF⻥OK+ VZ[( f#2;܃J>PDCv@Z }•cC 7'* 4u.7mp b2rcZI_ endstream endobj 74 0 obj << /Length 154 /Filter /FlateDecode >> stream x313T0P0asSCCB.c1s<=\ %E\N @BA,@Az H?*;&p4Aka[~ `1.WO@.^ endstream endobj 75 0 obj << /Length 253 /Filter /FlateDecode >> stream x}J@#E`}!k.p` A+ RK E#U(y[,gǰzqꜟJz`;볟 Z.(wk~x|ws%{/xv4lnfxYDdItSn\#7@efd=`El6X4jB*`f}E_h0bj1SL̀,x>v*!*:MƢ:?-y%ۧF@-7> endstream endobj 76 0 obj << /Length 161 /Filter /FlateDecode >> stream x313T0P0bcSCCB.1s<L =\ %E\N @B4Pe,B @d ?  B~oAd $?HzI8'W z endstream endobj 77 0 obj << /Length 132 /Filter /FlateDecode >> stream x313T0P0bcKS#CB.cC I$r9yr+r{E=}JJS. @-\.  @x@@?C1;}pA|.WO@.O) endstream endobj 78 0 obj << /Length 169 /Filter /FlateDecode >> stream x͏= @_#d.͟ B Fp !VbnxK q\`eW񊉣~2c!GOj .mO1dXV|-M -X endstream endobj 79 0 obj << /Length 198 /Filter /FlateDecode >> stream xڝ;@%$p.H)L0VjiVW(x[_~0E_cƃ=2b4gA ΄Sp)-8lsQy endstream endobj 80 0 obj << /Length 115 /Filter /FlateDecode >> stream x313T0P0b ebUel䃹 \.'O.pc.}(BIQi*Sm`Pz<7,{\W endstream endobj 81 0 obj << /Length 171 /Filter /FlateDecode >> stream xڽ= @[&G\@7!Q1#X^,7[n8ȃW3r9Al&]'-\,cx܎` s0 n ==Cbq1 SeKvI'mr/)T8R`5zf endstream endobj 82 0 obj << /Length 155 /Filter /FlateDecode >> stream x313T0P0bcc3CB.1s<L =\ %E\N @QhX.O$$PD2`$ȃ@H&?:7 q.WO@.ll endstream endobj 83 0 obj << /Length 183 /Filter /FlateDecode >> stream x}=@XLvNBLH0XF[٣Q8ab^2}KJ)*%Kw4 +@@)juE]VQzB[_P :9o.A@9(dq%7@'a/=ߵG.^Tyh p A!\\[>P: endstream endobj 84 0 obj << /Length 200 /Filter /FlateDecode >> stream xڥ= @g fI"SZYZZ(ښͣ[.(wS|7q4HRYs_8 LWCNv?$#(%p:lHj&5pGٌs V,S*7;(&A]t, -GT@8=F> $_ȥF<5ޯ endstream endobj 85 0 obj << /Length 211 /Filter /FlateDecode >> stream xڭ= @ 4 وVVb&7J{ Lig Z 6_B޼q;QH1.#ܡ$ )ѯO-3 # ƒcM?n0O$!Wɾb|31P_6rilxz+=Տ>jO=]quBVŴ~[)D\|kse8'vG endstream endobj 86 0 obj << /Length 158 /Filter /FlateDecode >> stream xڭ1 @ПJuj!Fp A+ RKAEh9JAqc![̃I`4-ØԈmjw쎜{Vky\Y\/|9êe_Hx+5C8#$RC\B"xo<Iw endstream endobj 87 0 obj << /Length 185 /Filter /FlateDecode >> stream xM1 @4!s7q5@T0XErr,,2ԎgDM&rv=pr^ًYMyaoY!RrGB7 }KD#"eZSW!("PB Ca}96A=> stream x313T0P0bc 3CB.cS I$r9yr+r{E=}JJS ]  b<] @AH2`h AA~[@ Lx:B endstream endobj 89 0 obj << /Length 148 /Filter /FlateDecode >> stream x313T0P0bcc3CB.1s<L =\ %E\N @QhX.O` $0()D? d=H2cģd> endstream endobj 90 0 obj << /Length 186 /Filter /FlateDecode >> stream x5= 0W:oN`B`A'qRGE7^̭ ء4ؔ? ,&Q@>0[}pb*Q)QzܟvI>>yG:J^]S |-,ZHZX:^<r[C准qzb&gaQ$L endstream endobj 91 0 obj << /Length 174 /Filter /FlateDecode >> stream x313T0P0bcc3CB.1s<L =\ %E\N @QhX.O `?aC00~ @2?Dv`N2~+ߎ #ȏߏ`` ?G#g``?A6 H@RՓ+ ɝm endstream endobj 92 0 obj << /Length 202 /Filter /FlateDecode >> stream xE; PEoH!LUBBBN!۲t @!L@,a̻{ې lfOÄܒZrɌOp>ܘW!kJ/LnRQ;H(+p{h/ O.ok> 44W&F&R$}xY& endstream endobj 93 0 obj << /Length 237 /Filter /FlateDecode >> stream xEαj@ dz)CB=ҩCɔdnvj:t&=$%p!:d-"zX!ZnhyxDQd}LKႲ)ֳ[{vȭ+OPy5 @U-G[;z[*lB;v\ɼHer;SHR Z88 ~Ka{ endstream endobj 94 0 obj << /Length 176 /Filter /FlateDecode >> stream x}1 P S2Y<9*BV N⤎G(Ϥc|?!?'S3>gt#͔+^wr~ÏB.9#W!H"Px+"B I / >i`$f_$hj(D{{-ӎ~b endstream endobj 95 0 obj << /Length 203 /Filter /FlateDecode >> stream xڝ= @_L#8MLRL!he!Vjih'({!q-6߲`}t!'<8 91 ũ piNfqJf)c2ot=̜w{@^m W÷x: dTLdO_'X`*w]!WҢqz9KU" }}d endstream endobj 96 0 obj << /Length 141 /Filter /FlateDecode >> stream x313T0Pac S#CB.# I$r9yr+Yp{E=}JJS ]  b<] X큸7001;j?0FJ endstream endobj 97 0 obj << /Length 222 /Filter /FlateDecode >> stream xe1N1E*i| .-V Ab $(UAݣ(>B,?kWEwk.i;O%/$=iI^>$nF6x0ڄʬ ͎X⌾T~fGvlgOȠ<|HTGǂ+ˇD5WTL3*=2,<8h endstream endobj 98 0 obj << /Length 181 /Filter /FlateDecode >> stream xڕ=@!$p. b&ZY+h pJLh$%^5Y (xTHN)74 U[QcL uMĄB9ƛG3a(if M( /#`cV2OZ˿Z;5t endstream endobj 99 0 obj << /Length 207 /Filter /FlateDecode >> stream xڥ= @4{t&)!BBB,xxqFE惝}ov)ZRGk;Sʱڬ)Nюe6aܠOi(Zb>$\Cǹ.5Tº)7 P \)'ߘ'-,e$9ґ i `AY ֚ G9-c endstream endobj 103 0 obj << /Length 258 /Filter /FlateDecode >> stream x1n0` x'b R"5SS۱Cd(9BFcWGRZ}l_Y1S#=e}EeEzYNzm6|<>I/O^捪ko?n>CK(I֪ov^سs`'rVr\w I˼ދ/np=g?;ؗ= 13rً E7Z1ӌk kmgj.=WMs endstream endobj 104 0 obj << /Length 346 /Filter /FlateDecode >> stream x}ѱJ@?lv_@p] !p` A+ RK E;!hM7HqfwO`vv23)Vf0WI%X8=Uk3UqaUASSbmn*Sުvm| 82"7@б, }8$tHIR2>JJ =MT;4[6R׳ā~D}~k.:6ʃHϐDJwk81ۇ=Isz6WBJI7l:ahJ7Cަ85,φkVq< /XYd|vRJJ}I endstream endobj 105 0 obj << /Length 270 /Filter /FlateDecode >> stream xڕJ@'LsL 'BB> stream xڅJ1g"0M!`Dy[ZYZZ(ںy}<•aǙP1|?IO :1H=>cTPc;Ocw!^_[^ʙ;V8?dmgPj\Rq :dĄ* |Vbn;gE d1o( ؁ahDBc!D[o1En %in6N:\Z` æ]H_I<?y뭜 endstream endobj 107 0 obj << /Length 138 /Filter /FlateDecode >> stream x3635Q0Pacc CB.# I$r9yr+Yp{E=}JJS ]  b<]``0f+ɃԂ 0a@\\\٥; endstream endobj 108 0 obj << /Length 107 /Filter /FlateDecode >> stream x3635Q0Pac cCB.#K I$r9yr+Yr{E=}JJS ]  b<]0a\= endstream endobj 109 0 obj << /Length 232 /Filter /FlateDecode >> stream xҽjA W#>WZL+vrp!ET+ -vXqt;';됱j-->xsiNY-gOّy+#CYEI O$Rx%4DJʤn ׮UH@Y$߸Np⧤D@(Ax^ 9Eۄip xviC endstream endobj 110 0 obj << /Length 169 /Filter /FlateDecode >> stream x;0 t#' VbTD$02`nQzT dj20XY陞c+4xRps?aq@iA W<ix=   E^6ɱC:_:Wѫ}O_ /h m Ij^ endstream endobj 111 0 obj << /Length 259 /Filter /FlateDecode >> stream x]1N@4;ۊB$\ Q%ڬ\vY)yTk.拊57 UIJ/Kn6O\k*ybx[~|nXp8HDF#々~7'QȔ^;LKZ+45qj@.dtv!"ieh֔j]dV絳Su ?hgcfKxhGZ endstream endobj 112 0 obj << /Length 186 /Filter /FlateDecode >> stream x3534S0P0R5T01Q07SH1*21 (Cds<L =\ %E\N @QhX.OON2bH$;&=A$3?8HAN7PJ`$H `( E` qzrr:p endstream endobj 113 0 obj << /Length 137 /Filter /FlateDecode >> stream x3337W0P04  )\\&f  ,ɥ`bƥU()*Mw pV0wQ6T0tQ```c;0D0I~0Y"I ?&D(I"\=VI endstream endobj 114 0 obj << /Length 301 /Filter /FlateDecode >> stream x}MJ0)YؖG_]x>.]W҅h=Je? گiftߟ ChÞ6 s/\knCs%ux^ߟ\s>k o@B,D'DdZ"-,-B/63"x甙k p7q|$pF暿 dL@AvZHFӬYM5k|,ZdIeb4j`Mg!@Tt`[Bͻ.A8Ew̕bԊW'bt7}t endstream endobj 118 0 obj << /Length 136 /Filter /FlateDecode >> stream x323P0PP5T02P04PH1*24(YBs< =\ %E\N @QhX.O9   fv6> $'W  ' endstream endobj 119 0 obj << /Length 220 /Filter /FlateDecode >> stream xڽ=0$N`!!U'D::htq@ZmIjlB-$CϐOj^gHs`[1e ,_z?Kse0C (eml dE|QbM*mhVK;-Fi,IUAmluΧl.CNZ=xں%giz@6 7 endstream endobj 120 0 obj << /Length 171 /Filter /FlateDecode >> stream x1 @ [~/1FJL!he!Vjuh%GL7pWjRVsȣ BRJœϲ?SVp\ؚdq$fyQ3ƴ_@ x6QjykaD D~:Vht%7Tm endstream endobj 121 0 obj << /Length 258 /Filter /FlateDecode >> stream x}J1 ] {-(tdibVp> stream xuϱJAba yh+RPK E;1 tƽpS|?;?xžjs3TC=-r+SrgkkrKyrM͒a{ծlB-`a:`u)xuwGW2&e˯ɦnh huaǨk} [ bԪob"EzONoɌla endstream endobj 123 0 obj << /Length 212 /Filter /FlateDecode >> stream xڽϱ0$7 x/$N$ &:9'utf,ƣ Fp $K8q b~bNe/DF4AFGi[?2%72byg6Nh:]hBQ֩L)϶?$nId[XmFiǞzՊuA63` ^j endstream endobj 124 0 obj << /Length 210 /Filter /FlateDecode >> stream xu1j0g<7 41'z(S$ MHXGQ|JW\(T 7uN3uki1}.Gq%Cf&u#U])Yϧz\R׹fi WOp_PI! I@*#f%#~,K{ǏT#,ΰq`(nYsLޖF^V2 endstream endobj 125 0 obj << /Length 125 /Filter /FlateDecode >> stream x323P0P0b#S3sCB.#C I$r9yr+r{E=}JJS. @-\. ? :  .WO@.P endstream endobj 126 0 obj << /Length 159 /Filter /FlateDecode >> stream x3534W0P0bSCCB. HrW01r{*r;8+r(D*ry(0a@R` `$@z ɀ a/ m?C&\=?qjS endstream endobj 127 0 obj << /Length 144 /Filter /FlateDecode >> stream x36׳4R0P0a3CB.c HrW06r{*r;8+r(D*ry(0`?l(g?6g u@lC{ pP endstream endobj 128 0 obj << /Length 162 /Filter /FlateDecode >> stream x1 @ᷤL fqC@Vb--+'Gˑ<@Ⱥ!X l3pjZ>DŽm:L#c^[z?.6 6KNJV- -reByDz 7U}`(D,uxI0nҷWR hhKob endstream endobj 129 0 obj << /Length 202 /Filter /FlateDecode >> stream x]= @Y6sݬ+0Z *ZGQr!n5|ś7ȈBR[^0$)?G19]/bLւ :c:k{-Ŭ`m88u t&p2 lB̘Ϙ> stream xeпJ@o \`^By]  @-G̣R^w]9 Opj8>xPS5ZOLIppu%?^^qDzŷ;JW\ׅˡ~ lr&Vg{'´N2;s8Gvn=ЪQob]pл ~^8:g007~ʞJT Ͼ4sM^!yJ[X' endstream endobj 131 0 obj << /Length 207 /Filter /FlateDecode >> stream xڽ P FҡмVn?`A'qRGE7f}>BŚނ*3$|9VuQۀ}+5͞1%kTڤ|18Ux*%V738 \A&rOP deyܿ>X ?c\%#'q(IfNĴ) endstream endobj 132 0 obj << /Length 259 /Filter /FlateDecode >> stream x]J@Of!"." E0pA.Z v |˝gH0??pNNmnҮwYUϹ勧7wk"nssa q[{_AꭅBaD4%;>#p{%*édlW]HO˷df 3ÂױtK҇FoMfl=o,"E"pLΉ~WhFF*4& !3DWZnvj endstream endobj 133 0 obj << /Length 285 /Filter /FlateDecode >> stream xmN0Fȃ%/~/IQ:F*E"02@bH͏GȘ!s[uY:9˅/|.|U_ݔOZ~̺1/ 2l~||}&ǹ/L'bFzNEؠtX !v$tS2WSK8Zdef-UwN: VBDXMvU=+OD6($8ㇸb+N==BZ!r5B<$gVZ}F=sӘ{~ endstream endobj 137 0 obj << /Length 300 /Filter /FlateDecode >> stream xڍҿN0[xH>?Ltr0N#0v w`bz׫ZS}⭄Of˓8?骆oCﯟw7K]@{ΏI FJn^: -ܰBu#NdD^@䟷ѷ's: Eptk17RDcD0&D̘8E#D]2+&D> stream xڭ1N0 :T#6*-TbFrw;4 7Ec0gH-tjhh8gowr> stream xұNPcܥp (DL'ZLtr0A3<rM22&ФMsJ.V+^p钳_n%V~z~3ď-M|uwj5'&S‹)6L5Q.u$-Υ\RElpl^H:"h.jM35vZّEQG6_Щmpvnj~3obGjr r yUȯ9$j}_~`6!v}[guLp&>ZU(f}}ݝp9sS `^ endstream endobj 143 0 obj << /Length 202 /Filter /FlateDecode >> stream xб 0+[ /0)BKB`A'qRGE)JKKq6!K%bY`{`1`XsCb#N}6k3l!"W0@> stream x}N0 ]u<SbFͣQ:vb$UM9'a;{Eli:=;>}^w~pc;sgw/z> stream x};N0Y&G\JP*K" * D pre(Ɵhe$ ;vlg.xccw/x]kgk[Zz~ATصnUT+lDu'1Q06ؑ!?v d;;)vjQ04@X,ra?Ýgk>THy(E045A׸Y@לbpѧ- Ee ? BR䌕Œ"gcNQ2R RmrK2QIX+!5E*uQj¨)Kf"JSfbRN׃`+ϐ endstream endobj 149 0 obj << /Length 233 /Filter /FlateDecode >> stream xڳ437R0PaSK CB.s3 I$r9yr+q{E=}JJS ]  b<]>@?7@Y - DBX`,v8bƪ@3nfd_b?̰⇇<`,x> stream xڕѽN0> stream xڵJ@%4y˼nn p` A+ J--PB\_SE;%_tB=ܵlkouLn}{ ?T\n0`Bh§"( v3,rV (R0(Z1̾?^3A RW^SML j3)0}1F3f liX6e*yX i}lM󣫖 S-zY endstream endobj 152 0 obj << /Length 267 /Filter /FlateDecode >> stream xڝJ1'lq0޼fpVb]hy}-86L /;q5%QwFO-kHfr;r +ZoyaC 2i寙5z>%k<&r,`vd+q3ߒ1^+ \oxE<@G*q/|Aoٸ=,8U(`ش fA-pڟڤPj"{mI倷YR endstream endobj 153 0 obj << /Length 182 /Filter /FlateDecode >> stream x3134W0P0b3CCCB.#JrW01 s{*r;8+. ц \. @? $|@H3?D2I?`dT!?0ȁ ```! i? P$\= endstream endobj 154 0 obj << /Length 351 /Filter /FlateDecode >> stream xڭJ0ǧȥº=z =umr!4LRuDg^W4;(M}h-ԣKCQ\jժԥ*NѮ̼<ޫbu~lX)U6_GzahB t ]2G6Da)hrcfEA1-?pλճ I}҈6ĥPgOn ܘ'+tc036u! 蒡AM"9%} |H=X9ZHv]ϽmE=LQVgq)ϜRT7D]n cƒ|M'b<%NZu>v endstream endobj 155 0 obj << /Length 219 /Filter /FlateDecode >> stream x37ѳT0P0bsCCCB.33JrW03 s{*r;8+r(D*ry(00`P"0C=~d3@@C P?P 8xq83qe0w`0H+p32> f qՓ+ P endstream endobj 156 0 obj << /Length 142 /Filter /FlateDecode >> stream x3631R0P0bcCKSCB.#1s<L=\ %E\N \. ц \.  30oAr 5 T @;af f!`` ȘՓ+ > stream x=J@ )2'p2Dl +BB\K E;qy^a2E33EdȼҥOumYꭥA +]Ȝc2͹~z|#8іF_[]PI%ae,*=c<<6F< ӉY+ _ ^Lubފq,?vMectJAqO8:G}- ȘKH~cD='0t[g7׏iC endstream endobj 158 0 obj << /Length 252 /Filter /FlateDecode >> stream xұJ1;|Iej`]),APKAEn"v[ |]\k܄[vGXN n2rב)MZ/W4mɟ˟1cɷ'E37.\P;s0 ]*\T3&03vrHM%.,^{aK u`m)4`BO5䀳"mDV_—B.fY/ë/AG-!A B endstream endobj 159 0 obj << /Length 207 /Filter /FlateDecode >> stream xѡ0[*#pO@@ %0&H@! $h%#L"uDKzz٢"\1CtAݓSi֫u{СuB U|0ۀؖB%/Q@Px_Qv؁ʲ#rO ^7\gpx'A~^ɼP/nC|U endstream endobj 160 0 obj << /Length 249 /Filter /FlateDecode >> stream xڭN@ }K!~5*1#ܣQ3T9l Iɾ5TUEš^+:pP3/F *-=UT>cKxii$@v#W@!'=r48 E\)GC B1:6b:wZK??"Xi=1wfbpY4?]e[t~x# endstream endobj 161 0 obj << /Length 288 /Filter /FlateDecode >> stream xѱN0Ы2DHmNJȀS22`%4*1Cg[!uBbbt:Ftr6IF9s|bli%cLl^_0\tSv PiYY0٣-$Fi nQC$lrڢWF$\Ea}!~"bǠ?qQu{3}>t^ uCaΟ jeG)AmJIeŐ[W.翢j؄7,?ne endstream endobj 162 0 obj << /Length 185 /Filter /FlateDecode >> stream x? P ,dМVt* ίGQzN:xȗ@ iDrj* CDJbCbqNjILjn߮#r)o̙-S/XSeFԕ+^+k۪d%A3vX}X~ö"7iӊ^Ds. endstream endobj 163 0 obj << /Length 281 /Filter /FlateDecode >> stream xu1N0G\o$"-D $(PR[mr⛐#Lvq v '33n"O'5sj<=x/5j֝){S^˵)x|1jSn衦t8z[d yDbDΰt=ZbM΢yqPje^5X*>YY:#BIj!MlG-ƨH]$?r>Pc6A٠~I"vfD7(0l@/]3wׄ endstream endobj 164 0 obj << /Length 191 /Filter /FlateDecode >> stream x3531T0P0R5T01UPH1*21 (XXBds<L=\ %E\N \. @b<] @>dF"ّH~$RLڃz0D2I@D1aL``n@'03H~`c1(l@A(8\=~@ endstream endobj 165 0 obj << /Length 203 /Filter /FlateDecode >> stream xҿAr $7/eQII\!R Q,'s0eQ"ܟ3?(%V U Вn(6Y4n+|א<>ȭh\ E&tj8 endstream endobj 166 0 obj << /Length 319 /Filter /FlateDecode >> stream x}?N0/`Cs' BmHJȀCH@l\+7 9 U6c#%glʮlfeח2foofךѮQSwՔk[roɴ{ q 4M@s`d<܃oh^53¼x@=tqeF3`0b)(jA>(Np5g PK>' o4s?u'4v)Jk(VλEӗM8"<¥1fdc,,@Y" -ji_[iOHw.zHhA~ ?4sxkAܴX endstream endobj 167 0 obj << /Length 335 /Filter /FlateDecode >> stream x}K0Wz(@œ`<'(LQo/w(/h3&ORH}Ev4d|ѫ7X%7Β~|dqwqOXZk z Ūe & 'NMpL7Vh2zeBC(,JX:6t%`֩FWC`ڃv1Kǚ ֒!KtQN6G%A>"10:@|yefx~x }P@QS@C))NIG%SԦHS ]W%O endstream endobj 172 0 obj << /Producer (pdfTeX-1.40.22) /Creator (TeX) /CreationDate (D:20220608233601+02'00') /ModDate (D:20220608233601+02'00') /Trapped /False /PTEX.Fullbanner (This is pdfTeX, Version 3.141592653-2.6-1.40.22 (TeX Live 2022/dev/Debian) kpathsea version 6.3.4/dev) >> endobj 11 0 obj << /Type /ObjStm /N 40 /First 317 /Length 1899 /Filter /FlateDecode >> stream xY[S7~ϯ?wK+TTxXCT6&HK.fUr3|4)bJh,iƇ!4q 'iEN[D#ڐO :&(-&fAQ"X2 0hEŒ̎u~0auO&Bdccj #6GHI^D8#`Ċg>xr Yxxn?<)b /_WOY_&0b6KZ|\׷vCz6ܬܽ_nyfxJW  gPiO/]\k午Lp4bG'aGQ㋪,c7-]4b7bww}\p.}s k&2/nsWl2/7yޭĭAV?|=z]'MO4I8}Kuߣ_?couvru>C_Lޭn6Fr~}(Q0DY{#pڤ+˃'&fmqʦB0 ;\1/2EFE6S'ugƿB.;֮cdwvA{Zv{jo}ދZS h𰀇5x, xbEz{@LPl\'Ġx汚 X hhhhhD0f?v~qDw t/6" 쭎$XNs`zBD HX i4N/M&J#,,,,,,,,,,Z4hhТEC.?N'H3 lp bXf BLHu[L]L]Ls=lqb nEJZd3H+>O+EAsasnE{-v"5L].n/vq|exBP->6 Olf(s$j];  dkՎkBvJDv0W:RT)V@mb7VPqzJX=MMT=@ z(L] |Tzl )\żdNE>`Q,QدK2x!]P3wܪpՆ|3)jv.ؐ7M5T<ܲS3yD9SnڽȎUG.GA7Gb{-ēDf]xߡ\ BqXQ`VE1ސ/mK֠*vؕYuH7(nur,9dcB~xw6, ,JJ`18Y>QGR` P۵Ђ,!0$ݍ./t pH uT&Nlׅo@mqHDm$7:#$Fv1,Uߌ;jlKjJj.yFF$OUKu%Quֶv]e{ %Nֱ˜S <6}| 5M.sCJ痫QwNu<u:uKWٍXAa]|vzbaa.}ac;22y/5uoOթkIv@s,t %9_=8![y!s<Qјek;Ep'~00ؑaYG~Ԓ7g5Ǘ <828967E2491ED7E51A441CADB4A191D6>] /Length 515 /Filter /FlateDecode >> stream xIlOQV<Ƣ5\SUicXH H,XYHH!w;{ %"˳,K$ht2 AaDDHFdonQDDW$|CA>b-"9$RDji"}h"+Bd˞"AͰ!G<}ſDkQH}E 1uE**o17X@D{1?}$bZ.f4bVbN( DQ)'XC,ub5K4 `6DڳXsh b}hmz1)FO?ͦb[zUbLtnk@ j[e5-rF\KqhiϷ+g],^Ǿ I{w}:Jg<޷l8wM>cqF\|-+r.w5oY endstream endobj startxref 40279 %%EOF proxy/inst/NEWS.Rd0000644000175100001440000001027514246417604013506 0ustar hornikusers\name{NEWS} \title{News for Package \pkg{proxy}} \section{Changes in version 0.4-27}{ \itemize{ \item cosmetic changes in help pages \item add angular distance/similarity; keep old behavior as cosine } } \section{Changes in version 0.4-26}{ \itemize{ \item Change coercion function for cosine similarity to distance to 1-x instead of 1-abs(x) } } \section{Changes in version 0.4-25}{ \itemize{ \item fix scaling for gower metric in case of cross-proximities to scale x and y together.y } } \section{Changes in version 0.4-24}{ \itemize{ \item fix tests in apply.R to comply to new default behavior of encoding character strings in data frames } } \section{Changes in version 0.4-23}{ \itemize{ \item smaller changes in C code to prevent false positives from automated CRAN code checking } } \section{Changes in version 0.4-21.1}{ \itemize{ \item Change registry class name to \code{proxy_registry} to prevent name clashes with registry package. } } \section{Changes in version 0.4-21}{ \itemize{ \item some more C-code \code{PROTECT}ing. } } \section{Changes in version 0.4-20}{ \itemize{ \item print method for \code{simil} objects added that prints \code{NA} instead of 0 for diagonal elements, if requested. } } \section{Changes in version 0.4-19}{ \itemize{ \item fix for binary distance (C-code): now has same behavior as the corresponding distance method in the \pkg{stats} package in case of missing values. } } \section{Changes in version 0.4-17}{ \itemize{ \item \code{pr_simil2dist} now computes \eqn{1 - abs(x)} instead of \eqn{1 - x}. } } \section{Changes in version 0.4-15}{ \itemize{ \item add imports from recommended packages. } } \section{Changes in version 0.4-14}{ \itemize{ \item export as.matrix() generic. } } \section{Changes in version 0.4-11}{ \itemize{ \item Make binary similarities work with binary data in case of list data (by coercing list to a matrix). } } \section{Changes in version 0.4-10}{ \itemize{ \item Bug in Fager/McGown measure fixed. } } \section{Changes in version 0.4-9}{ \itemize{ \item Author/Maintainer removed. } } \section{Changes in version 0.4-8}{ \itemize{ \item PACKAGE="proxy" argument added to all .External-calls. "PACKAGE" field added to the registry, defaulting to "proxy", and used by .Call(). } } \section{Changes in version 0.4-7}{ \itemize{ \item the method argument of \code{dist()} now also accepts a registry entry to avoid unnecessary lookups in repeated calls to \code{dist()}. } } \section{Changes in version 0.4-6}{ \itemize{ \item bugfix: \code{names<-.dist} did not accept \code{NULL} as assignment. } } \section{Changes in version 0.4-5}{ \itemize{ \item small vignette with an overview of existing methods added. } } \section{Changes in version 0.4-4}{ \itemize{ \item Podani, Chord, geodesic, Hellinger and Whittaker dissimilarities added. \item Fager/McGown anf Faith similarities added. } } \section{Changes in version 0.4-3}{ \itemize{ \item \code{pr_dist2simil} now computes \eqn{1 / (1 + x)} instead of \eqn{1 / (1 - x)} \item fix labeling bug for list data with named components \item the C-level dist functions now conform with those in package \pkg{stats} with respect to NA handling (pairwise-complete observations). } } \section{Changes in version 0.4-2}{ \itemize{ \item \code{as.matrix.dist} by default now uses 0 for diagonal elements } } \section{Changes in version 0.4}{ \itemize{ \item fix a bug in Kullback-Leibler-distance } } \section{Changes in version 0.3}{ \itemize{ \item fixed Gower-dissimilarity \item fixed dist/simil subsetting \item self-proximities now default to NA } } \section{Changes in version 0.2}{ \itemize{ \item data frames are auto-converted to matrices if all variables are either numeric, logical, or complex (to be compatible to \code{stats::dist}) \item pairwise distances can now be computed \item Registry is now case-insensitive } } \section{Changes in version 0.1}{ \itemize{ \item Initial release } }