proxy/0000755000176200001440000000000014057514472011444 5ustar liggesusersproxy/NAMESPACE0000644000176200001440000000175013261704567012670 0ustar liggesusersimport("stats") importFrom("utils", "formatUL") export("dist", "simil", "as.dist", "as.simil", "as.matrix", "pr_simil2dist", "pr_dist2simil", "row.dist", "col.dist", "rowSums.dist", "colSums.dist", "rowMeans.dist", "colMeans.dist", "pr_DB") S3method("dim", "dist") S3method("dimnames", "dist") S3method("dimnames<-", "dist") S3method("names", "dist") S3method("names<-", "dist") S3method("subset", "dist") S3method("[[", "dist") S3method("as.matrix", "dist") S3method("as.matrix", "simil") S3method("print","crossdist") S3method("print","pairdist") S3method("print","simil") S3method("summary","pr_DB") S3method("print","summary.pr_DB") ## generic registry stuff S3method("[[", "proxy_registry") S3method("length", "proxy_registry") S3method("print","proxy_registry") S3method("print","registry_field") S3method("print","registry_entry") S3method("summary","proxy_registry") S3method("as.data.frame","proxy_registry") useDynLib("proxy", .registration = TRUE) proxy/man/0000755000176200001440000000000013213571565012216 5ustar liggesusersproxy/man/rowSums.dist.Rd0000755000176200001440000000264710643260444015135 0ustar liggesusers\name{rowSums.dist} \alias{rowSums.dist} \alias{colSums.dist} \alias{rowMeans.dist} \alias{colMeans.dist} \title{Row Sums/Means of Sparse Symmetric Matrices} \description{ Compute the row (column) sums or means for a sparse symmetric (distance) matrix. } \usage{ rowSums.dist(x, na.rm = FALSE) rowMeans.dist(x, na.rm = FALSE, diag = TRUE) colSums.dist(x, na.rm = FALSE) colMeans.dist(x, na.rm = FALSE, diag = TRUE) } \arguments{ \item{x}{an object of class \code{dist}.} \item{na.rm}{logical, should missing values (including \code{NaN}) be omitted from the summation?} \item{diag}{logical, should the diagonal elements be included in the computation?} } \details{ These functions are more efficient than expanding an object of class \code{dist} to matrix and using \code{rowSums} or \code{rowMeans}. \code{colSums} and \code{colMeans} are provided for convenience. However, note that due to symmetry the result is always the same as for \code{rowSums} or \code{rowMeans}. } \value{ A numeric vector of row sums. } \author{Christian Buchta} \seealso{\code{as.matrix}, \code{as.dist}, and \code{rowSums}.} \examples{ ## x <- matrix(runif(10*2),ncol=2) d <- dist(x) rowSums(as.matrix(d)) rowSums.dist(d) # the same rowMeans(as.matrix(d)) rowMeans.dist(d) # the same rowMeans.dist(d, diag = FALSE) # not the same ## NAs d[3] <- NA rowSums.dist(d, na.rm = TRUE) rowMeans.dist(d, na.rm = TRUE) } \keyword{cluster} proxy/man/dist.Rd0000644000176200001440000001501013213571565013445 0ustar liggesusers\name{dist} \alias{dist} \alias{simil} \alias{print.simil} \alias{print.dist} \alias{print.crosssimil} \alias{print.crossdist} \alias{as.matrix.dist} \alias{as.matrix.simil} \alias{pr_simil2dist} \alias{pr_dist2simil} \alias{as.matrix} \alias{as.dist} \alias{as.simil} \alias{row.dist} \alias{col.dist} \title{Matrix Distance/Similarity Computation} \description{ These functions compute and return the auto-distance/similarity matrix between either rows or columns of a matrix/data frame, or a list, as well as the cross-distance matrix between two matrices/data frames/lists. } \usage{ dist(x, y = NULL, method = NULL, ..., diag = FALSE, upper = FALSE, pairwise = FALSE, by_rows = TRUE, convert_similarities = TRUE, auto_convert_data_frames = TRUE) simil(x, y = NULL, method = NULL, ..., diag = FALSE, upper = FALSE, pairwise = FALSE, by_rows = TRUE, convert_distances = TRUE, auto_convert_data_frames = TRUE) pr_dist2simil(x) pr_simil2dist(x) as.dist(x, FUN = NULL) as.simil(x, FUN = NULL) \method{as.matrix}{dist}(x, diag = 0, \dots) \method{as.matrix}{simil}(x, diag = NA, \dots) } \arguments{ \item{x}{For \code{dist} and \code{simil}, a numeric matrix object, a data frame, or a list. A vector will be converted into a column matrix. For \code{as.simil} and \code{as.dist}, an object of class \code{dist} and \code{simil}, respectively, or a numeric matrix. For \code{pr_dist2simil} and \code{pr_simil2dist}, any numeric vector.} \item{y}{\code{NULL}, or a similar object than \code{x}} \item{method}{a function, a registry entry, or a mnemonic string referencing the proximity measure. A list of all available measures can be obtained using \code{\link{pr_DB}} (see examples). The default for \code{dist} is \code{"Euclidean"}, and for \code{simil} \code{"correlation"}.} \item{diag}{logical value indicating whether the diagonal of the distance/similarity matrix should be printed by \code{\link{print.dist}}/\code{\link{print.simil}}. Note that the diagonal values are never stored in \code{dist} objects. In the context of \code{as.matrix} the value to use on the diagonal representing self-proximities. In case of similarities, this defaults to \code{NA} since a priori there are no upper bounds, so the maximum similarity needs to be specified by the user.} \item{upper}{logical value indicating whether the upper triangle of the distance/similarity matrix should be printed by \code{\link{print.dist}}/\code{\link{print.simil}}} \item{pairwise}{logical value indicating whether distances should be computed for the pairs of \code{x} and \code{y} only.} \item{by_rows}{logical indicating whether proximities between rows, or columns should be computed.} \item{convert_similarities, convert_distances}{logical indicating whether distances should be automatically converted into similarities (and the other way round) if needed.} \item{auto_convert_data_frames}{logical indicating whether data frames should be converted to matrices if all variables are numeric, or all are logical, or all are complex.} \item{FUN}{optional function to be used by \code{as.dist} and \code{as.simil}. If \code{NULL}, it is looked up in the method registry. If there is none specified there, \code{FUN} defaults to \code{pr_simil2dist} and \code{pr_dist2simil}, respectively.} \item{\dots}{further arguments passed to the proximity function.} } \details{ The interface is fashioned after \code{\link[stats]{dist}}, but can also compute cross-distances, and allows user extensions by means of registry of all proximity measures (see \code{\link{pr_DB}}). Missing values are allowed but are excluded from all computations involving the rows within which they occur. If some columns are excluded in calculating a Euclidean, Manhattan, Canberra or Minkowski distance, the sum is scaled up proportionally to the number of columns used (compare \code{\link[stats]{dist}} in package \pkg{stats}). Data frames are silently coerced to matrix if all columns are of (same) mode \code{numeric} or \code{logical}. Distance measures can be used with \code{simil}, and similarity measures with \code{dist}. In these cases, the result is transformed accordingly using the specified coercion functions (default: \eqn{pr\_simil2dist(x) = 1 - abs(x)} and \eqn{pr\_dist2simil(x) = 1 / (1 + x)}). Objects of class \code{simil} and \code{dist} can be converted one in another using \code{as.dist} and \code{as.simil}, respectively. Distance and similarity objects can conveniently be subset (see examples). Note that duplicate indexes are silently ignored. } \value{ Auto distances/similarities are returned as an object of class \code{dist}/\code{simil} and cross-distances/similarities as an object of class \code{crossdist}/\code{crosssimil}. } \references{ Anderberg, M.R. (1973), \emph{Cluster analysis for applications}, 359 pp., Academic Press, New York, NY, USA. Cox, M.F. and Cox, M.A.A. (2001), \emph{Multidimensional Scaling}, Chapman and Hall. Sokol, R.S. and Sneath P.H.A (1963), \emph{Principles of Numerical Taxonomy}, W. H. Freeman and Co., San Francisco. } \author{David Meyer \email{David.Meyer@R-project.org} and Christian Buchta \email{Christian.Buchta@wu-wien.ac.at}} \seealso{\code{\link[stats]{dist}} for compatibility information, and \code{\link{pr_DB}} for the proximity data base.} \examples{ ### show available proximities summary(pr_DB) ### get more information about a particular one pr_DB$get_entry("Jaccard") ### binary data x <- matrix(sample(c(FALSE, TRUE), 8, rep = TRUE), ncol = 2) dist(x, method = "Jaccard") ### for real-valued data dist(x, method = "eJaccard") ### for positive real-valued data dist(x, method = "fJaccard") ### cross distances dist(x, x, method = "Jaccard") ### pairwise (diagonal) dist(x, x, method = "Jaccard", pairwise = TRUE) ### this is the same but less efficient as.matrix(stats::dist(x, method = "binary")) ### numeric data x <- matrix(rnorm(16), ncol = 4) ## test inheritance of names rownames(x) <- LETTERS[1:4] colnames(x) <- letters[1:4] dist(x) dist(x, x) ## custom distance function f <- function(x, y) sum(x * y) dist(x, f) ## working with lists z <- unlist(apply(x, 1, list), recursive = FALSE) (d <- dist(z)) dist(z, z) ## subsetting d[[1:2]] subset(d, c(1,3,4)) d[[c(1,2,2)]] # duplicate index gets ignored ## transformations and self-proximities as.matrix(as.simil(d, function(x) exp(-x)), diag = 1) ## row and column indexes row.dist(d) col.dist(d) } \keyword{cluster} proxy/man/registry.Rd0000755000176200001440000002157510645442730014367 0ustar liggesusers\name{pr_DB} \alias{pr_DB} \alias{registry} \alias{summary.pr_DB} %- Also NEED an '\alias' for EACH other topic documented here. \title{Registry of proximities} \description{ Registry containing similarities and distances. } \usage{ pr_DB \special{pr_DB$get_field(name)} \special{pr_DB$get_fields()} \special{pr_DB$get_field_names()} \special{pr_DB$set_field(name, default = NA, type = NA, is_mandatory = FALSE, is_modifiable = TRUE, validity_FUN = NULL)} \special{pr_DB$entry_exists(name)} \special{pr_DB$get_entry(name)} \special{pr_DB$get_entries(name = NULL, pattern = NULL)} \special{pr_DB$get_entry_names(name)} \special{pr_DB$set_entry(...)} \special{pr_DB$modify_entry(...)} \special{pr_DB$delete_entry(name)} \method{summary}{pr_DB}(object, verbosity = c("short", "long"), ...) } %- maybe also 'usage' for other objects documented here. \arguments{ \item{name}{character string representing the name of an entry (case-insensitive).} \item{pattern}{regular expression to be matched to all fields of class \code{"character"} in all entries.} \item{default}{optional default value for the field.} \item{type}{optional character string specifying the class to be required for this field. If \code{type} is a character vector with more than two elements, the entries will be used as fixed set of alternatives. If \code{type} is not a character string or vector, the class will be inferred from the argument given.} \item{is_mandatory}{logical specifying whether new entries are required to have a value for this field.} \item{is_modifiable}{logical specifying whether entries can be changed with respect to that field.} \item{validity_FUN}{optional function or character string with the name of a function that checks the validity of a field entry. Such a function gets the value to be investigated as argument, and should stop with an error message if the value is not correct.} \item{object}{a registry object.} \item{verbosity}{controlling the verbosity of the output of the summary method for the registry. \code{"short"} gives just a list, \code{"long"} also gives the formulas.} \item{\dots}{for \code{pr_DB$set_entry} and \code{pr_DB$modify_entry}: named list of fields to be modified in or added to the registry (see details). This must include the index field (\code{"names"}).} } \details{ \code{pr_DB} represents the registry of all proximity measures available. For each measure, it comprises meta-information that can be queried and extended. Also, new measures can be added. This is done using the following accessor functions of the \code{pr_DB} object: \code{get_field_names()} returns a character vector with all field names. \code{get_field()} returns the information for a specific field as a list with components named as described above. \code{get_fields()} returns a list with all field entries. \code{set_field()} is used to create new fields in the repository (the default value will be set in all entries). \code{get_entry_names()} returns a character vector with (the first alias of) all entries. \code{entry_exists()} is a predicate checking if an entry with the specified alias exists in the registry. \code{get_entry()} returns the specified entry if it exists (and, by default, gives an error if it does not). \code{get_entries()} is used to query more than one entry: either those matching \code{name} exactly, or those where the regular expression in \code{pattern} matches \emph{any} character field in an entry. By default, all values are returned. \code{delete_entry} removes an existing entry from the registry (note that only user-provided entries can be deleted). \code{set_entry} and \code{modify_entry} require a named list of arguments used as field entries. At least the \code{names} index field is required. \code{set_entry} will check for all other mandatory fields. If specified in the field meta data, each field entry and the entry as a whole is checked for validity. Note that only user-specified fields and/or entries can be modified, the data shipped with the package are read-only. The registry fields currently available are as follows: \describe{ \item{FUN}{Function to register (see below).} \item{names}{Character vector with an alias(es) for the measure.} \item{PREFUN}{Optional function (or function name) for preprocessing code (see below).} \item{POSTFUN}{Optional function (or function name) for postprocessing code (see below).} \item{distance}{logical indicating whether this measure is a distance (\code{TRUE}) or similarity (\code{FALSE}).} \item{convert}{Optional Function or function name for converting between similarities and distances when needed.} \item{type}{Optional, the scale the measure applies to (\code{"metric"}, \code{"ordinal"}, \code{"nominal"}, \code{"binary"}, or \code{"other"}). If \code{NULL}, it is assumed to apply to some other unknown scale.} \item{loop}{logical indicating whether \code{FUN} is just a measure, and therefore, if \code{\link{dist}} shall do the loop over all pairs of observations/variables, or if \code{FUN} does the loop on its own.} \item{\code{C_FUN}}{logical indicating whether \code{FUN} is a C function.} \item{abcd}{logical; if \code{TRUE} and binary data (or data to be interpreted as such) are supplied, the number of concordant and discordant pairs is precomputed for every two binary data vectors and supplied to the measure function.} \item{formula}{Optional character string with the symbolic representation of the formula.} \item{reference}{Optional reference (character).} \item{description}{Optional description (character). Ideally, describes the context in which the measure can be applied.} } A function specified as \code{FUN} parameter has mandatory arguments \code{x} and \code{y} (if \code{abcd} is \code{FALSE}), and \code{a}, \code{b}, \code{c}, \code{d}, \code{n} otherwise. Additionally, it gets all optional parameters specified by the user in the \code{\dots} argument of the \code{\link{dist}} and \code{\link{simil}} functions, possibly changed and/or complemented by the corresponding (optional) \code{PREFUN} function. It must return the (diss-)similarity value computed from the arguments. \code{x} and \code{y} are two vectors from the data matrix (matrices) supplied. If \code{abcd} is \code{FALSE}, it is assumed that binary measures will be used, and the number of all \code{n} concordant and discordant pairs (x\_k, y\_k) precomputed and supplied instead of \code{x} and \code{y}. \code{a}, \code{b}, \code{c}, and \code{d} are the counts of all (TRUE, TRUE), (TRUE, FALSE), (FALSE, TRUE), and (FALSE, FALSE) pairs, respectively. A function specified as \code{PREFUN} parameter has mandatory arguments \code{x}, \code{y}, \code{p}, and \code{reg_entry}, with \code{y} and \code{p} possibly being \code{NULL} depending on the task at hand. \code{x} and \code{y} are the data objects, \code{p} is a (possibly empty) list with all specified proximity parameters, and \code{reg_entry} is the registry entry (a named list containing all information specified in \code{reg_add}). The preprocessing function is allowed to change all these information, and if so, is required to return *all* arguments as a named list in the same order. A function specified as \code{POSTFUN} parameter has two mandatory arguments: \code{result} and \code{p}. \code{result} will contain the computed raw data, i.e. a vector of length \eqn{n * (n - 1) / 2} for auto-distances (see \code{\link[stats]{dist}} for details on \code{dist} objects), or a matrix for cross-distances. \code{p} contains the specified proximity parameters. Post-processing functions need to return the \code{result} object (even if unmodified). A function specified as \code{convert} parameter should preserve the type of its argument. } \author{David Meyer \email{David.Meyer@R-project.org}} \seealso{\code{\link{dist}}} \examples{ ## create a new distance measure mydist <- function(x,y) x * y ## create a new entry in the registry with two aliases pr_DB$set_entry(FUN = mydist, names = c("test", "mydist")) ## look it up (index is case insensitive): pr_DB$get_entry("TEST") ## modify the content of the description field in the new entry pr_DB$modify_entry(names = "test", description = "foo function") ## create a new field pr_DB$set_field("New") ## look up the test entry again (two ways) pr_DB$get_entry("test") pr_DB[["test"]] ## show total number of entries length(pr_DB) ## show all entries (short list) pr_DB$get_entries(pattern = "foo") ## show more details summary(pr_DB, "long") ## get all entries in a list (and extract first two ones) pr_DB$get_entries()[1:2] ## get all entries as a data frame (select first 3 fields) as.data.frame(pr_DB)[,1:3] ## delete test entry pr_DB$delete_entry("test") ## check if it is really gone pr_DB$entry_exists("test") } \keyword{cluster}% __ONLY ONE__ keyword per line proxy/TODO0000755000176200001440000000037210702641354012132 0ustar liggesusers Possible future work o integrate computation on sparse matrices. o provide cutoff values in proximity computation. o provide computation of centroids. Possible problems o as.simil and as.dist change the objects class before calling FUN. proxy/DESCRIPTION0000644000176200001440000000150414057514472013152 0ustar liggesusersPackage: proxy Type: Package Title: Distance and Similarity Measures Version: 0.4-26 Authors@R: c(person(given = "David", family = "Meyer", role = c("aut", "cre"), email = "David.Meyer@R-project.org"), person(given = "Christian", family = "Buchta", role = "aut")) Description: Provides an extensible framework for the efficient calculation of auto- and cross-proximities, along with implementations of the most popular ones. Depends: R (>= 3.4.0) Imports: stats, utils Suggests: cba Collate: registry.R database.R dist.R similarities.R dissimilarities.R util.R seal.R License: GPL-2 NeedsCompilation: yes Packaged: 2021-06-06 14:08:54 UTC; meyer Author: David Meyer [aut, cre], Christian Buchta [aut] Maintainer: David Meyer Repository: CRAN Date/Publication: 2021-06-07 22:10:02 UTC proxy/build/0000755000176200001440000000000014057153366012544 5ustar liggesusersproxy/build/vignette.rds0000644000176200001440000000040214057153366015077 0ustar liggesusersuQn0 MiǠbb?Xq*ZYk* tm [8γB!͔3@DbB $U{IVV`JRҽ9xCYh# yRֶFW!oPA|\c*W,XnCy-s3 {Ͷ6 t-'P=K[onnZ$COݿ~9g`98H ;2proxy/tests/0000755000176200001440000000000013651074225012602 5ustar liggesusersproxy/tests/distance.Rout.save0000644000176200001440000002627713175160705016222 0ustar liggesusers R version 3.4.2 (2017-09-28) -- "Short Summer" Copyright (C) 2017 The R Foundation for Statistical Computing Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > ### ceeboo 2007 > > ## todo: special values NaN, NA, and Inf > > library("proxy") Attaching package: 'proxy' The following objects are masked from 'package:stats': as.dist, dist The following object is masked from 'package:base': as.matrix > > set.seed(20070630) > > ## > > x <- matrix(runif(20),5,4) > inherits(x, "matrix") [1] TRUE > rownames(x) <- LETTERS[1:5] > y <- x > y[2,] <- x[1,] <- 0 > > x [,1] [,2] [,3] [,4] A 0.0000000 0.000000000 0.0000000 0.0000000 B 0.8390691 0.002929016 0.3449861 0.4751930 C 0.0377036 0.300615279 0.1399626 0.5143060 D 0.7621676 0.963274350 0.4358104 0.7934745 E 0.1211588 0.452265898 0.8005272 0.3062947 > y [,1] [,2] [,3] [,4] A 0.4595829 0.2462831 0.6662151 0.2658000 B 0.0000000 0.0000000 0.0000000 0.0000000 C 0.0377036 0.3006153 0.1399626 0.5143060 D 0.7621676 0.9632744 0.4358104 0.7934745 E 0.1211588 0.4522659 0.8005272 0.3062947 > > ## user interfaces > > r <- .Call("R_minkowski_dist", x, NULL, FALSE, 1, PACKAGE = "proxy") > all.equal(c(r), c(stats::dist(x, method = "minkowski", p = 1))) [1] TRUE > r A B C D B 1.6621772 C 0.9925875 1.3431883 D 2.9547267 1.4463526 1.9621392 E 1.6802466 1.7916865 1.1036817 2.0039138 > .Call("R_minkowski_dist", x, x, FALSE, 1, PACKAGE = "proxy") A B C D E A 0.0000000 1.662177 0.9925875 2.954727 1.680247 B 1.6621772 0.000000 1.3431883 1.446353 1.791686 C 0.9925875 1.343188 0.0000000 1.962139 1.103682 D 2.9547267 1.446353 1.9621392 0.000000 2.003914 E 1.6802466 1.791686 1.1036817 2.003914 0.000000 > .Call("R_minkowski_dist", x, y, FALSE, 1, PACKAGE = "proxy") A B C D E A 1.6378810 0.0000000 0.9925875 2.954727 1.680247 B 1.1534623 1.6621772 1.3431883 1.446353 1.791686 C 1.2509699 0.9925875 0.0000000 1.962139 1.103682 D 1.7776551 2.9547267 1.9621392 0.000000 2.003914 E 0.7192137 1.6802466 1.1036817 2.003914 0.000000 > .Call("R_minkowski_dist", x, y, TRUE, 1, PACKAGE = "proxy") [1] 1.637881 1.662177 0.000000 0.000000 0.000000 > > dfun <- paste("R",c("euclidean", "maximum", "manhattan", "canberra", "binary", "matching", "fuzzy", "mutual"),"dist", sep = "_") > > for (f in dfun) { + cat("\nTesting ",f,"\n\n",sep="") + r <- try(do.call(".Call", list(f, x, NULL, FALSE, PACKAGE = "proxy"))) + if ( inherits(r, "try-error")) + next + s <- try(stats::dist(x, method = gsub("R_|_dist", "", f))) + if (!inherits(s, "try-error")) + print(all.equal(c(r), c(s))) + print(r) + print(do.call(".Call", list(f, x, x, FALSE, PACKAGE = "proxy"))) + print(do.call(".Call", list(f, x, y, FALSE, PACKAGE = "proxy"))) + print(do.call(".Call", list(f, x, y, TRUE, PACKAGE = "proxy"))) + } Testing R_euclidean_dist [1] TRUE A B C D B 1.0241432 C 0.6130998 0.8799820 D 1.5258864 1.0186899 1.0627446 E 0.9766699 0.9763929 0.7138466 1.0209722 A B C D E A 0.0000000 1.0241432 0.6130998 1.525886 0.9766699 B 1.0241432 0.0000000 0.8799820 1.018690 0.9763929 C 0.6130998 0.8799820 0.0000000 1.062745 0.7138466 D 1.5258864 1.0186899 1.0627446 0.000000 1.0209722 E 0.9766699 0.9763929 0.7138466 1.020972 0.0000000 A B C D E A 0.8867716 0.0000000 0.6130998 1.525886 0.9766699 B 0.5918315 1.0241432 0.8799820 1.018690 0.9763929 C 0.7208543 0.6130998 0.0000000 1.062745 0.7138466 D 0.9680706 1.5258864 1.0627446 0.000000 1.0209722 E 0.4202848 0.9766699 0.7138466 1.020972 0.0000000 [1] 0.8867716 1.0241432 0.0000000 0.0000000 0.0000000 Testing R_maximum_dist [1] TRUE A B C D B 0.8390691 C 0.5143060 0.8013655 D 0.9632744 0.9603453 0.7244640 E 0.8005272 0.7179103 0.6605646 0.6410087 A B C D E A 0.0000000 0.8390691 0.5143060 0.9632744 0.8005272 B 0.8390691 0.0000000 0.8013655 0.9603453 0.7179103 C 0.5143060 0.8013655 0.0000000 0.7244640 0.6605646 D 0.9632744 0.9603453 0.7244640 0.0000000 0.6410087 E 0.8005272 0.7179103 0.6605646 0.6410087 0.0000000 A B C D E A 0.6662151 0.0000000 0.5143060 0.9632744 0.8005272 B 0.3794862 0.8390691 0.8013655 0.9603453 0.7179103 C 0.5262524 0.5143060 0.0000000 0.7244640 0.6605646 D 0.7169912 0.9632744 0.7244640 0.0000000 0.6410087 E 0.3384240 0.8005272 0.6605646 0.6410087 0.0000000 [1] 0.6662151 0.8390691 0.0000000 0.0000000 0.0000000 Testing R_manhattan_dist [1] TRUE A B C D B 1.6621772 C 0.9925875 1.3431883 D 2.9547267 1.4463526 1.9621392 E 1.6802466 1.7916865 1.1036817 2.0039138 A B C D E A 0.0000000 1.662177 0.9925875 2.954727 1.680247 B 1.6621772 0.000000 1.3431883 1.446353 1.791686 C 0.9925875 1.343188 0.0000000 1.962139 1.103682 D 2.9547267 1.446353 1.9621392 0.000000 2.003914 E 1.6802466 1.791686 1.1036817 2.003914 0.000000 A B C D E A 1.6378810 0.0000000 0.9925875 2.954727 1.680247 B 1.1534623 1.6621772 1.3431883 1.446353 1.791686 C 1.2509699 0.9925875 0.0000000 1.962139 1.103682 D 1.7776551 2.9547267 1.9621392 0.000000 2.003914 E 0.7192137 1.6802466 1.1036817 2.003914 0.000000 [1] 1.637881 1.662177 0.000000 0.000000 0.000000 Testing R_canberra_dist [1] TRUE A B C D B 4.000000 C 4.000000 2.356997 D 4.000000 1.409165 2.157322 E 4.000000 2.348575 1.682606 1.824656 A B C D E A NA 4.000000 4.000000 4.000000 4.000000 B 4 0.000000 2.356997 1.409165 2.348575 C 4 2.356997 0.000000 2.157322 1.682606 D 4 1.409165 2.157322 0.000000 1.824656 E 4 2.348575 1.682606 1.824656 0.000000 A B C D E A 4.000000 NA 4.000000 4.000000 4.000000 B 1.868964 4 2.356997 1.409165 2.348575 C 1.919038 4 0.000000 2.157322 1.682606 D 1.547657 4 2.157322 0.000000 1.824656 E 1.039972 4 1.682606 1.824656 0.000000 [1] 4 4 0 0 0 Testing R_binary_dist [1] TRUE A B C D B 1 C 1 0 D 1 0 0 E 1 0 0 0 A B C D E A 0 1 1 1 1 B 1 0 0 0 0 C 1 0 0 0 0 D 1 0 0 0 0 E 1 0 0 0 0 A B C D E A 1 0 1 1 1 B 0 1 0 0 0 C 0 1 0 0 0 D 0 1 0 0 0 E 0 1 0 0 0 [1] 1 1 0 0 0 Testing R_matching_dist Error in .Call("R_matching_dist", structure(c(0, 0.839069103589281, 0.037703595822677, : "R_matching_dist" not available for .Call() for package "proxy" Testing R_fuzzy_dist Error in stats::dist(x, method = gsub("R_|_dist", "", f)) : invalid distance method A B C D B 1.0000000 C 1.0000000 0.6719380 D 1.0000000 0.4770877 0.6640679 E 1.0000000 0.6979540 0.5844973 0.6036897 A B C D E A 0 1.0000000 1.0000000 1.0000000 1.0000000 B 1 0.0000000 0.6719380 0.4770877 0.6979540 C 1 0.6719380 0.0000000 0.6640679 0.5844973 D 1 0.4770877 0.6640679 0.0000000 0.6036897 E 1 0.6979540 0.5844973 0.6036897 0.0000000 A B C D E A 1.0000000 0 1.0000000 1.0000000 1.0000000 B 0.5180002 1 0.6719380 0.4770877 0.6979540 C 0.6445909 1 0.0000000 0.6640679 0.5844973 D 0.5581104 1 0.6640679 0.0000000 0.6036897 E 0.3562808 1 0.5844973 0.6036897 0.0000000 [1] 1 1 0 0 0 Testing R_mutual_dist Error in .Call("R_mutual_dist", structure(c(0, 0.839069103589281, 0.037703595822677, : "R_mutual_dist" not available for .Call() for package "proxy" > > ## no longer optimized > > .Call("R_ejaccard", x, NULL, FALSE, PACKAGE = "proxy") A B C D B 0.0000000 C 0.0000000 0.2957500 D 0.0000000 0.5298998 0.4107824 E 0.0000000 0.3549987 0.4459170 0.5179277 > .Call("R_ejaccard", x, x, FALSE, PACKAGE = "proxy") A B C D E A 1 0.0000000 0.0000000 0.0000000 0.0000000 B 0 1.0000000 0.2957500 0.5298998 0.3549987 C 0 0.2957500 1.0000000 0.4107824 0.4459170 D 0 0.5298998 0.4107824 1.0000000 0.5179277 E 0 0.3549987 0.4459170 0.5179277 1.0000000 > .Call("R_ejaccard", x, y, FALSE, PACKAGE = "proxy") A B C D E A 0.0000000 1 0.0000000 0.0000000 0.0000000 B 0.6794648 0 0.2957500 0.5298998 0.3549987 C 0.3820854 0 1.0000000 0.4107824 0.4459170 D 0.5374164 0 0.4107824 1.0000000 0.5179277 E 0.8157020 0 0.4459170 0.5179277 1.0000000 > .Call("R_ejaccard", x, y, TRUE, PACKAGE = "proxy") [1] 0 0 1 1 1 > > .Call("R_cosine", x, NULL, FALSE, PACKAGE = "proxy") A B C D B 0.0000000 C 0.0000000 0.5179092 D 0.0000000 0.7485219 0.8416675 E 0.0000000 0.5245741 0.6848730 0.7514756 > .Call("R_cosine", x, x, FALSE, PACKAGE = "proxy") A B C D E A 1 0.0000000 0.0000000 0.0000000 0.0000000 B 0 1.0000000 0.5179092 0.7485219 0.5245741 C 0 0.5179092 1.0000000 0.8416675 0.6848730 D 0 0.7485219 0.8416675 1.0000000 0.7514756 E 0 0.5245741 0.6848730 0.7514756 1.0000000 > .Call("R_cosine", x, y, FALSE, PACKAGE = "proxy") A B C D E A 0.0000000 1 0.0000000 0.0000000 0.0000000 B 0.8175510 0 0.5179092 0.7485219 0.5245741 C 0.5909955 0 1.0000000 0.8416675 0.6848730 D 0.8046383 0 0.8416675 1.0000000 0.7514756 E 0.9026897 0 0.6848730 0.7514756 1.0000000 > .Call("R_cosine", x, y, TRUE, PACKAGE = "proxy") [1] 0 0 1 1 1 > > x <- matrix(x > 0.5, 5,4) > y <- matrix(y > 0.5, 5,4) > > .Call("R_bjaccard", x, NULL, FALSE, PACKAGE = "proxy") 1 2 3 4 2 0.0000000 3 0.0000000 0.0000000 4 0.0000000 0.3333333 0.3333333 5 0.0000000 0.0000000 0.0000000 0.0000000 > .Call("R_bjaccard", x, x, FALSE, PACKAGE = "proxy") [,1] [,2] [,3] [,4] [,5] [1,] 1 0.0000000 0.0000000 0.0000000 0 [2,] 0 1.0000000 0.0000000 0.3333333 0 [3,] 0 0.0000000 1.0000000 0.3333333 0 [4,] 0 0.3333333 0.3333333 1.0000000 0 [5,] 0 0.0000000 0.0000000 0.0000000 1 > .Call("R_bjaccard", x, y, FALSE, PACKAGE = "proxy") [,1] [,2] [,3] [,4] [,5] [1,] 0 1 0.0000000 0.0000000 0 [2,] 0 0 0.0000000 0.3333333 0 [3,] 0 0 1.0000000 0.3333333 0 [4,] 0 0 0.3333333 1.0000000 0 [5,] 1 0 0.0000000 0.0000000 1 > .Call("R_bjaccard", x, y, TRUE, PACKAGE = "proxy") [1] 0 0 1 1 1 > > ### > > proc.time() user system elapsed 0.288 0.040 0.325 proxy/tests/util.Rout.save0000644000176200001440000000577513037764465015417 0ustar liggesusers R version 3.1.2 (2014-10-31) -- "Pumpkin Helmet" Copyright (C) 2014 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > ## test C interfaces > > library(proxy) Attaching package: 'proxy' The following objects are masked from 'package:stats': as.dist, dist The following object is masked from 'package:base': as.matrix > > set.seed(20070630) > > x <- as.dist(matrix(runif(25),5,5)) > x 1 2 3 4 2 0.8390691 3 0.0377036 0.3006153 4 0.7621676 0.9632744 0.4358104 5 0.1211588 0.4522659 0.8005272 0.3062947 > attributes(x) $Size [1] 5 $call as.dist.default(m = x) $class [1] "dist" $Diag [1] FALSE $Upper [1] FALSE > > z <- .Call(proxy:::R_subset_dist, x, 3) > z dist(0) > > unclass(z) numeric(0) attr(,"Size") [1] 1 attr(,"call") as.dist.default(m = x) attr(,"Diag") [1] FALSE attr(,"Upper") [1] FALSE > > .Call(proxy:::R_subset_dist, x, c(1,3,5)) 1 2 2 0.0377036 3 0.1211588 0.8005272 > > attr(x, "Labels") <- LETTERS[1:5] > > z <- .Call(proxy:::R_subset_dist, x, c("A","C","E")) > z A C C 0.0377036 E 0.1211588 0.8005272 > attributes(z) $Size [1] 3 $call as.dist.default(m = x) $class [1] "dist" $Diag [1] FALSE $Upper [1] FALSE $Labels [1] "A" "C" "E" > > attr(x, "Labels") <- NULL > > .Call(proxy:::R_rowSums_dist, x, FALSE) [1] 1.760099 2.555225 1.574656 2.467547 1.680247 > .Call(proxy:::R_rowSums_dist, z, FALSE) A C E 0.1588624 0.8382308 0.9216860 > > .Call(proxy:::R_row_dist, x, FALSE) # row() [1] 2 3 4 5 3 4 5 4 5 5 > .Call(proxy:::R_row_dist, x, TRUE) # col() [1] 1 1 1 1 2 2 2 3 3 4 > > ## test R interfaces > > dim(x) [1] 5 5 > dimnames(x) <- letters[1:5] > dimnames(x) [1] "a" "b" "c" "d" "e" > names(x) <- LETTERS[1:5] > names(x) [1] "A" "B" "C" "D" "E" > > row.dist(x) [1] 2 3 4 5 3 4 5 4 5 5 > col.dist(x) [1] 1 1 1 1 2 2 2 3 3 4 > > subset(x, c(1,3,5)) A C C 0.0377036 E 0.1211588 0.8005272 > x[[c(1,3,5)]] A C C 0.0377036 E 0.1211588 0.8005272 > x[c(1,3,5)] # as usual [1] 0.8390691 0.7621676 0.3006153 > > x[[-1]] # drop subscripts B C D C 0.3006153 D 0.9632744 0.4358104 E 0.4522659 0.8005272 0.3062947 > > x[[1]] # empty dist(0) > > ### > > proc.time() user system elapsed 0.193 0.030 0.216 proxy/tests/util.R0000644000176200001440000000151213037764465013713 0ustar liggesusers ## test C interfaces library(proxy) set.seed(20070630) x <- as.dist(matrix(runif(25),5,5)) x attributes(x) z <- .Call(proxy:::R_subset_dist, x, 3) z unclass(z) .Call(proxy:::R_subset_dist, x, c(1,3,5)) attr(x, "Labels") <- LETTERS[1:5] z <- .Call(proxy:::R_subset_dist, x, c("A","C","E")) z attributes(z) attr(x, "Labels") <- NULL .Call(proxy:::R_rowSums_dist, x, FALSE) .Call(proxy:::R_rowSums_dist, z, FALSE) .Call(proxy:::R_row_dist, x, FALSE) # row() .Call(proxy:::R_row_dist, x, TRUE) # col() ## test R interfaces dim(x) dimnames(x) <- letters[1:5] dimnames(x) names(x) <- LETTERS[1:5] names(x) row.dist(x) col.dist(x) subset(x, c(1,3,5)) x[[c(1,3,5)]] x[c(1,3,5)] # as usual x[[-1]] # drop subscripts x[[1]] # empty ### proxy/tests/distcalls.Rout.save0000644000176200001440000001026213037764465016407 0ustar liggesusers R version 3.3.0 (2016-05-03) -- "Supposedly Educational" Copyright (C) 2016 The R Foundation for Statistical Computing Platform: i686-pc-linux-gnu (32-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ################################# > ## Test for dist calls > ################################## > > library(proxy) Attaching package: 'proxy' The following objects are masked from 'package:stats': as.dist, dist The following object is masked from 'package:base': as.matrix > > set.seed(20140107) > > ## get all measures > proxies = pr_DB$get_entry_names() > > ## remove special cases > proxies = setdiff(proxies, c("Mahalanobis", "Minkowski", "Stiles", "Levenshtein", "fJaccard")) > > ## create test data > x = matrix(1:100, 10) > > ## test function: checks if dist(x) == dist(x,x) for all measures, > ## and if diag(dist(x, x)) == diag(x, x, pairwise = TRUE) > prtest <- function(...) { + CD <- dist(x, x, ...) + all(as.matrix(dist(x, ...)) == CD) && + all(diag(CD) == dist(x, x, pairwise = TRUE, ...)) + } > > ## loop over all measures (except special cases) > for (i in proxies) + {cat(i); prtest(i); cat(": OK.\n")} Jaccard: OK. Kulczynski1: OK. Kulczynski2: OK. Mountford: OK. Fager: OK. Russel: OK. simple matching: OK. Hamman: OK. Faith: OK. Tanimoto: OK. Dice: OK. Phi: OK. Michael: OK. Mozley: OK. Yule: OK. Yule2: OK. Ochiai: OK. Simpson: OK. Braun-Blanquet: OK. cosine: OK. eJaccard: OK. eDice: OK. correlation: OK. Chi-squared: OK. Phi-squared: OK. Tschuprow: OK. Cramer: OK. Pearson: OK. Gower: OK. Euclidean: OK. Bhjattacharyya: OK. Manhattan: OK. supremum: OK. Canberra: OK. Wave: OK. divergence: OK. Kullback: OK. Bray: OK. Soergel: OK. Podani: OK. Chord: OK. Geodesic: OK. Whittaker: OK. Hellinger: OK. > > ## Minkowski > for (j in c(0.5, 1, 2, 3, Inf)) + {cat("Minkowski: p =", j); prtest("Minkowski", p = j); cat(": OK.\n")} Minkowski: p = 0.5: OK. Minkowski: p = 1: OK. Minkowski: p = 2: OK. Minkowski: p = 3: OK. Minkowski: p = Inf: OK. > > ## Mahalanobis (need non-singular matrix) > x = as.matrix(iris[1:50,-5]) > prtest("Mahalanobis") [1] TRUE > > ## fJaccard (needs values in unit interval) > x = as.matrix(1:100/100, 10) > prtest("fJaccard") [1] TRUE > > ## produce binary matrix > x = matrix(rbinom(100,1,0.7), 10) > > ## Stiles (gives a lot of warnings due to log) > tmp = dist(x, "Stiles") There were 27 warnings (use warnings() to see them) > tmp = dist(x, x, "Stiles") There were 50 or more warnings (use warnings() to see the first 50) > > ## try again (almost) all measures, this time with binary data to check > ## conversions > for (i in proxies) + {cat(i); prtest(i); cat(": OK.\n")} Jaccard: OK. Kulczynski1: OK. Kulczynski2: OK. Mountford: OK. Fager: OK. Russel: OK. simple matching: OK. Hamman: OK. Faith: OK. Tanimoto: OK. Dice: OK. Phi: OK. Michael: OK. Mozley: OK. Yule: OK. Yule2: OK. Ochiai: OK. Simpson: OK. Braun-Blanquet: OK. cosine: OK. eJaccard: OK. eDice: OK. correlation: OK. Chi-squared: OK. Phi-squared: OK. Tschuprow: OK. Cramer: OK. Pearson: OK. Gower: OK. Euclidean: OK. Bhjattacharyya: OK. Manhattan: OK. supremum: OK. Canberra: OK. Wave: OK. divergence: OK. Kullback: OK. Bray: OK. Soergel: OK. Podani: OK. Chord: OK. Geodesic: OK. Whittaker: OK. Hellinger: OK. > ## Minkowski > for (j in c(0.5, 1, 2, 3, Inf)) + {cat("Minkowski: p =", j); prtest("Minkowski", p = j); cat(": OK.\n")} Minkowski: p = 0.5: OK. Minkowski: p = 1: OK. Minkowski: p = 2: OK. Minkowski: p = 3: OK. Minkowski: p = Inf: OK. > > ## Levenshtein distance > s <- c("A", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog") > all(as.matrix(dist(s, "Levenshtein")) == dist(s, s, "Levenshtein")) [1] TRUE > > ## Test auto-conversion > x = iris[,-5] > prtest() [1] TRUE > > > > proc.time() user system elapsed 1.960 0.020 1.974 proxy/tests/registry.R0000755000176200001440000000353210643260444014601 0ustar liggesusers########################## ### registry test instances library(proxy) .my_check_fun <- function(x) if (x$Z == 999 && x$New2 == 999) stop("No evil allowed!") ## create registry R <- proxy:::registry(entry_class = "simple.list", validity_FUN = .my_check_fun) R ## set fields R$set_field("X", type = TRUE, is_mandatory = TRUE) R$set_field("Y", type = "character") R$set_field("Z", default = 123) R$get_fields() ## add entries R$set_entry(names = "test", X = TRUE, Y = "bla") R$set_entry(names = "test2", X = FALSE, Y = "foo", Z = 99) R$set_entry(names = "test3", X = FALSE, Y = "bar", Z = "chars") R$get_entry("test") R[["test2"]] R[["test3"]] ## add new field R$set_field("New") R$get_field("New") ## change entries R$modify_entry(names = "test", New = 123) R$modify_entry(names = "test2", New = "test") ## field check function (checks for strict positive values) R$set_field("New2", type = "numeric", validity_FUN = function(x) stopifnot(x > 0)) R$set_entry(names = "test5", X = TRUE, New2 = 2) ## add field with fixed alternatives R$set_field("New3", type = c("A", "B")) R$get_field("New") R$set_entry(names = "test6", X = TRUE, New3 = "A") ## print/summary = as.data.frame R summary(R) ## seal entries R$seal_entries() R$set_field("New4") R$set_entry(names = "test7", X = TRUE, Y = "bla") R$delete_entry("test7") R$modify_entry(names = "test", New4 = "test") ## error cases: TRY <- function(...) stopifnot(inherits(try(..., silent = TRUE), "try-error")) TRY(R$set_field("bla", type = "character", default = 123)) TRY(R$set_entry("err1", Y = "bla")) TRY(R$set_entry("err2", X = "bla")) TRY(R$set_entry("err3", X = TRUE, New2 = -2)) TRY(R$set_entry("err4", X = TRUE, Z = 999, New2 = 999)) TRY(R$set_entry("err5", X = TRUE, New3 = "C")) TRY(R$modify_entry("Bla", "New", 123)) TRY(R$modify_entry("X", "Bla", 123)) TRY(R$modify_entry("test","X",TRUE)) proxy/tests/distance.R0000644000176200001440000000357513175160705014531 0ustar liggesusers ### ceeboo 2007 ## todo: special values NaN, NA, and Inf library("proxy") set.seed(20070630) ## x <- matrix(runif(20),5,4) inherits(x, "matrix") rownames(x) <- LETTERS[1:5] y <- x y[2,] <- x[1,] <- 0 x y ## user interfaces r <- .Call("R_minkowski_dist", x, NULL, FALSE, 1, PACKAGE = "proxy") all.equal(c(r), c(stats::dist(x, method = "minkowski", p = 1))) r .Call("R_minkowski_dist", x, x, FALSE, 1, PACKAGE = "proxy") .Call("R_minkowski_dist", x, y, FALSE, 1, PACKAGE = "proxy") .Call("R_minkowski_dist", x, y, TRUE, 1, PACKAGE = "proxy") dfun <- paste("R",c("euclidean", "maximum", "manhattan", "canberra", "binary", "matching", "fuzzy", "mutual"),"dist", sep = "_") for (f in dfun) { cat("\nTesting ",f,"\n\n",sep="") r <- try(do.call(".Call", list(f, x, NULL, FALSE, PACKAGE = "proxy"))) if ( inherits(r, "try-error")) next s <- try(stats::dist(x, method = gsub("R_|_dist", "", f))) if (!inherits(s, "try-error")) print(all.equal(c(r), c(s))) print(r) print(do.call(".Call", list(f, x, x, FALSE, PACKAGE = "proxy"))) print(do.call(".Call", list(f, x, y, FALSE, PACKAGE = "proxy"))) print(do.call(".Call", list(f, x, y, TRUE, PACKAGE = "proxy"))) } ## no longer optimized .Call("R_ejaccard", x, NULL, FALSE, PACKAGE = "proxy") .Call("R_ejaccard", x, x, FALSE, PACKAGE = "proxy") .Call("R_ejaccard", x, y, FALSE, PACKAGE = "proxy") .Call("R_ejaccard", x, y, TRUE, PACKAGE = "proxy") .Call("R_cosine", x, NULL, FALSE, PACKAGE = "proxy") .Call("R_cosine", x, x, FALSE, PACKAGE = "proxy") .Call("R_cosine", x, y, FALSE, PACKAGE = "proxy") .Call("R_cosine", x, y, TRUE, PACKAGE = "proxy") x <- matrix(x > 0.5, 5,4) y <- matrix(y > 0.5, 5,4) .Call("R_bjaccard", x, NULL, FALSE, PACKAGE = "proxy") .Call("R_bjaccard", x, x, FALSE, PACKAGE = "proxy") .Call("R_bjaccard", x, y, FALSE, PACKAGE = "proxy") .Call("R_bjaccard", x, y, TRUE, PACKAGE = "proxy") ### proxy/tests/registry.Rout.save0000644000176200001440000000757412464233322016273 0ustar liggesusers R version 3.1.2 (2014-10-31) -- "Pumpkin Helmet" Copyright (C) 2014 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > ########################## > ### registry test instances > > library(proxy) Attaching package: 'proxy' The following objects are masked from 'package:stats': as.dist, dist The following object is masked from 'package:base': as.matrix > > .my_check_fun <- function(x) if (x$Z == 999 && x$New2 == 999) stop("No evil allowed!") > > ## create registry > R <- proxy:::registry(entry_class = "simple.list", + validity_FUN = .my_check_fun) > R An object of class "registry" with no entry. > > ## set fields > R$set_field("X", type = TRUE, is_mandatory = TRUE) > R$set_field("Y", type = "character") > R$set_field("Z", default = 123) > R$get_fields() $names type character default NA is_mandatory TRUE is_modifiable TRUE validity_FUN NULL $X type logical default NA is_mandatory TRUE is_modifiable TRUE validity_FUN NULL $Y type character default NA is_mandatory FALSE is_modifiable TRUE validity_FUN NULL $Z type NA default 123 is_mandatory FALSE is_modifiable TRUE validity_FUN NULL > > ## add entries > R$set_entry(names = "test", X = TRUE, Y = "bla") > R$set_entry(names = "test2", X = FALSE, Y = "foo", Z = 99) > R$set_entry(names = "test3", X = FALSE, Y = "bar", Z = "chars") > R$get_entry("test") _ names test X TRUE Y bla Z 123 > R[["test2"]] _ names test2 X FALSE Y foo Z 99 > R[["test3"]] _ names test3 X FALSE Y bar Z chars > > ## add new field > R$set_field("New") > R$get_field("New") type NA default NA is_mandatory FALSE is_modifiable TRUE validity_FUN NULL > > ## change entries > R$modify_entry(names = "test", New = 123) > R$modify_entry(names = "test2", New = "test") > > ## field check function (checks for strict positive values) > R$set_field("New2", type = "numeric", validity_FUN = function(x) stopifnot(x > 0)) > R$set_entry(names = "test5", X = TRUE, New2 = 2) > > ## add field with fixed alternatives > R$set_field("New3", type = c("A", "B")) > R$get_field("New") type NA default NA is_mandatory FALSE is_modifiable TRUE validity_FUN NULL > R$set_entry(names = "test6", X = TRUE, New3 = "A") > > ## print/summary = as.data.frame > R An object of class "registry" with 5 entries. > summary(R) X Y Z New New2 New3 test TRUE bla 123 123 NA test2 FALSE foo 99 test NA test3 FALSE bar chars NA test5 TRUE 123 2 test6 TRUE 123 NA A > > ## seal entries > R$seal_entries() > R$set_field("New4") > R$set_entry(names = "test7", X = TRUE, Y = "bla") > R$delete_entry("test7") > R$modify_entry(names = "test", New4 = "test") > > ## error cases: > TRY <- function(...) stopifnot(inherits(try(..., silent = TRUE), "try-error")) > TRY(R$set_field("bla", type = "character", default = 123)) > TRY(R$set_entry("err1", Y = "bla")) > TRY(R$set_entry("err2", X = "bla")) > TRY(R$set_entry("err3", X = TRUE, New2 = -2)) > TRY(R$set_entry("err4", X = TRUE, Z = 999, New2 = 999)) > TRY(R$set_entry("err5", X = TRUE, New3 = "C")) > TRY(R$modify_entry("Bla", "New", 123)) > TRY(R$modify_entry("X", "Bla", 123)) > TRY(R$modify_entry("test","X",TRUE)) > > proc.time() user system elapsed 0.226 0.011 0.230 proxy/tests/distcalls.R0000644000176200001440000000322613037764465014724 0ustar liggesusers################################# ## Test for dist calls ################################## library(proxy) set.seed(20140107) ## get all measures proxies = pr_DB$get_entry_names() ## remove special cases proxies = setdiff(proxies, c("Mahalanobis", "Minkowski", "Stiles", "Levenshtein", "fJaccard")) ## create test data x = matrix(1:100, 10) ## test function: checks if dist(x) == dist(x,x) for all measures, ## and if diag(dist(x, x)) == diag(x, x, pairwise = TRUE) prtest <- function(...) { CD <- dist(x, x, ...) all(as.matrix(dist(x, ...)) == CD) && all(diag(CD) == dist(x, x, pairwise = TRUE, ...)) } ## loop over all measures (except special cases) for (i in proxies) {cat(i); prtest(i); cat(": OK.\n")} ## Minkowski for (j in c(0.5, 1, 2, 3, Inf)) {cat("Minkowski: p =", j); prtest("Minkowski", p = j); cat(": OK.\n")} ## Mahalanobis (need non-singular matrix) x = as.matrix(iris[1:50,-5]) prtest("Mahalanobis") ## fJaccard (needs values in unit interval) x = as.matrix(1:100/100, 10) prtest("fJaccard") ## produce binary matrix x = matrix(rbinom(100,1,0.7), 10) ## Stiles (gives a lot of warnings due to log) tmp = dist(x, "Stiles") tmp = dist(x, x, "Stiles") ## try again (almost) all measures, this time with binary data to check ## conversions for (i in proxies) {cat(i); prtest(i); cat(": OK.\n")} ## Minkowski for (j in c(0.5, 1, 2, 3, Inf)) {cat("Minkowski: p =", j); prtest("Minkowski", p = j); cat(": OK.\n")} ## Levenshtein distance s <- c("A", "quick", "brown", "fox", "jumps", "over", "the", "lazy", "dog") all(as.matrix(dist(s, "Levenshtein")) == dist(s, s, "Levenshtein")) ## Test auto-conversion x = iris[,-5] prtest() proxy/tests/apply.R0000644000176200001440000000543413651074111014052 0ustar liggesusers ## tests on apply C wrappers library(proxy) set.seed(20070630) ## matrix f <- function(x, y) sum(x*y) / sqrt(sum(x*x)) / sqrt(sum(y*y)) x <- matrix(runif(20), 5, 4) x y <- matrix(runif(20), 5, 4) y .External(proxy:::R_apply_dist_matrix, x, NULL, FALSE, f) .External(proxy:::R_apply_dist_matrix, x, x, FALSE, f) .External(proxy:::R_apply_dist_matrix, x, y, FALSE, f) .External(proxy:::R_apply_dist_matrix, x, y, TRUE, f) # coerce z <- y * 100 storage.mode(z) <- "integer" .External(proxy:::R_apply_dist_matrix, x, z, FALSE, f) .External(proxy:::R_apply_dist_matrix, z, x, FALSE, f) .External(proxy:::R_apply_dist_matrix, z, z, FALSE, f) .External(proxy:::R_apply_dist_matrix, z, NULL, FALSE, f) ## list x <- unlist(apply(x, 1, list), recursive = FALSE) x y <- unlist(apply(y, 1, list), recursive = FALSE) .External(proxy:::R_apply_dist_list, x, NULL, FALSE, f) .External(proxy:::R_apply_dist_list, x, x, FALSE, f) .External(proxy:::R_apply_dist_list, x, y, FALSE, f) .External(proxy:::R_apply_dist_list, x, y, TRUE, f) ## logical matrix f <- function(a, b, c, d, n) a / sqrt(a+b) / sqrt(a+c) x <- t(sapply(x, ">", 0.5)) x y <- t(sapply(y, ">", 0.5)) .External(proxy:::R_apply_dist_binary_matrix, x, NULL, FALSE, f) .External(proxy:::R_apply_dist_binary_matrix, x, x, FALSE, f) .External(proxy:::R_apply_dist_binary_matrix, x, y, FALSE, f) .External(proxy:::R_apply_dist_binary_matrix, x, y, TRUE, f) ## data.frame f <- function(x, y) sum(x*y) / sqrt(sum(x*x)) / sqrt(sum(y*y)) x <- data.frame(unlist(apply(x, 2, list), recursive = FALSE)) names(x) <- letters[1:4] x y <- data.frame(unlist(apply(y, 2, list), recursive = FALSE)) names(y) <- letters[1:4] .External(proxy:::R_apply_dist_data_frame, x, NULL, FALSE, f) .External(proxy:::R_apply_dist_data_frame, x, x, FALSE, f) .External(proxy:::R_apply_dist_data_frame, x, y, FALSE, f) .External(proxy:::R_apply_dist_data_frame, x, y, TRUE, f) # f <- function(x, y) { if (rownames(x) == 1 && rownames(y) == 1) { print(x) str(x) print(y) } sum(x == y) / length(x) } x <- data.frame(1:5, LETTERS[1:5], stringsAsFactors = FALSE) x y <- data.frame(1:6, LETTERS[c(1,1:5)], row.names = letters[1:6], stringsAsFactors = FALSE) y all.equal(x, y) identical(attributes(x[[1]]), attributes(y[[1]])) identical(attributes(x[[2]]), attributes(y[[2]])) .External(proxy:::R_apply_dist_data_frame, x, NULL, FALSE, f) .External(proxy:::R_apply_dist_data_frame, x, x, FALSE, f) .External(proxy:::R_apply_dist_data_frame, x, y, FALSE, f) .External(proxy:::R_apply_dist_data_frame, x, y[-6,], TRUE, f) # test parameters f <- function(x, y, p = 1) sum(x == y) / length(x) * p .External(proxy:::R_apply_dist_data_frame, x, y[-6,], TRUE, f) .External(proxy:::R_apply_dist_data_frame, x, y[-6,], TRUE, f, p = 2) ### proxy/tests/apply.Rout.save0000644000176200001440000002633413651075270015550 0ustar liggesusers R version 3.6.3 (2020-02-29) -- "Holding the Windsock" Copyright (C) 2020 The R Foundation for Statistical Computing Platform: x86_64-pc-linux-gnu (64-bit) R is free software and comes with ABSOLUTELY NO WARRANTY. You are welcome to redistribute it under certain conditions. Type 'license()' or 'licence()' for distribution details. R is a collaborative project with many contributors. Type 'contributors()' for more information and 'citation()' on how to cite R or R packages in publications. Type 'demo()' for some demos, 'help()' for on-line help, or 'help.start()' for an HTML browser interface to help. Type 'q()' to quit R. > > ## tests on apply C wrappers > > library(proxy) Attaching package: 'proxy' The following objects are masked from 'package:stats': as.dist, dist The following object is masked from 'package:base': as.matrix > > set.seed(20070630) > > ## matrix > > f <- function(x, y) sum(x*y) / sqrt(sum(x*x)) / sqrt(sum(y*y)) > > x <- matrix(runif(20), 5, 4) > x [,1] [,2] [,3] [,4] [1,] 0.4595829 0.246283120 0.6662151 0.2658000 [2,] 0.8390691 0.002929016 0.3449861 0.4751930 [3,] 0.0377036 0.300615279 0.1399626 0.5143060 [4,] 0.7621676 0.963274350 0.4358104 0.7934745 [5,] 0.1211588 0.452265898 0.8005272 0.3062947 > y <- matrix(runif(20), 5, 4) > y [,1] [,2] [,3] [,4] [1,] 0.3435311 0.45003283 0.1931588 0.22999466 [2,] 0.4417228 0.78191550 0.9028450 0.01728483 [3,] 0.3872093 0.45154308 0.7324979 0.52502253 [4,] 0.9119128 0.21720445 0.3986998 0.72623146 [5,] 0.9374529 0.04343485 0.8170873 0.69974714 > > .External(proxy:::R_apply_dist_matrix, x, NULL, FALSE, f) 1 2 3 4 2 0.8175510 3 0.5909955 0.5179092 4 0.8046383 0.7485219 0.8416675 5 0.9026897 0.5245741 0.6848730 0.7514756 > .External(proxy:::R_apply_dist_matrix, x, x, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 1.0000000 0.8175510 0.5909955 0.8046383 0.9026897 [2,] 0.8175510 1.0000000 0.5179092 0.7485219 0.5245741 [3,] 0.5909955 0.5179092 1.0000000 0.8416675 0.6848730 [4,] 0.8046383 0.7485219 0.8416675 1.0000000 0.7514756 [5,] 0.9026897 0.5245741 0.6848730 0.7514756 1.0000000 > .External(proxy:::R_apply_dist_matrix, x, y, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 0.8068091 0.8869376 0.9573450 0.8394141 0.9257044 [2,] 0.7091915 0.5310191 0.7490815 0.9743857 0.9583243 [3,] 0.7471004 0.4955903 0.7898068 0.6896626 0.5971429 [4,] 0.9837110 0.7703657 0.8896691 0.8665744 0.7655715 [5,] 0.7512326 0.9126553 0.9466108 0.6140860 0.7184058 > .External(proxy:::R_apply_dist_matrix, x, y, TRUE, f) [1] 0.8068091 0.5310191 0.7898068 0.8665744 0.7184058 > > # coerce > > z <- y * 100 > storage.mode(z) <- "integer" > .External(proxy:::R_apply_dist_matrix, x, z, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 0.8035753 0.8851147 0.9568709 0.8363210 0.9255173 [2,] 0.7027894 0.5281041 0.7457478 0.9754800 0.9584822 [3,] 0.7399077 0.4909526 0.7896421 0.6859131 0.5945390 [4,] 0.9811939 0.7674593 0.8884016 0.8638128 0.7638742 [5,] 0.7490644 0.9109332 0.9482222 0.6083438 0.7173810 > .External(proxy:::R_apply_dist_matrix, z, x, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 0.8035753 0.7027894 0.7399077 0.9811939 0.7490644 [2,] 0.8851147 0.5281041 0.4909526 0.7674593 0.9109332 [3,] 0.9568709 0.7457478 0.7896421 0.8884016 0.9482222 [4,] 0.8363210 0.9754800 0.6859131 0.8638128 0.6083438 [5,] 0.9255173 0.9584822 0.5945390 0.7638742 0.7173810 > .External(proxy:::R_apply_dist_matrix, z, z, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 1.0000000 0.8363694 0.8593771 0.8075186 0.7133480 [2,] 0.8363694 1.0000000 0.8668551 0.5849013 0.6553123 [3,] 0.8593771 0.8668551 1.0000000 0.8253313 0.8715788 [4,] 0.8075186 0.5849013 0.8253313 1.0000000 0.9495662 [5,] 0.7133480 0.6553123 0.8715788 0.9495662 1.0000000 > .External(proxy:::R_apply_dist_matrix, z, NULL, FALSE, f) 1 2 3 4 2 0.8363694 3 0.8593771 0.8668551 4 0.8075186 0.5849013 0.8253313 5 0.7133480 0.6553123 0.8715788 0.9495662 > > ## list > > x <- unlist(apply(x, 1, list), recursive = FALSE) > x [[1]] [1] 0.4595829 0.2462831 0.6662151 0.2658000 [[2]] [1] 0.839069104 0.002929016 0.344986119 0.475192965 [[3]] [1] 0.0377036 0.3006153 0.1399626 0.5143060 [[4]] [1] 0.7621676 0.9632744 0.4358104 0.7934745 [[5]] [1] 0.1211588 0.4522659 0.8005272 0.3062947 > y <- unlist(apply(y, 1, list), recursive = FALSE) > > .External(proxy:::R_apply_dist_list, x, NULL, FALSE, f) 1 2 3 4 2 0.8175510 3 0.5909955 0.5179092 4 0.8046383 0.7485219 0.8416675 5 0.9026897 0.5245741 0.6848730 0.7514756 > .External(proxy:::R_apply_dist_list, x, x, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 1.0000000 0.8175510 0.5909955 0.8046383 0.9026897 [2,] 0.8175510 1.0000000 0.5179092 0.7485219 0.5245741 [3,] 0.5909955 0.5179092 1.0000000 0.8416675 0.6848730 [4,] 0.8046383 0.7485219 0.8416675 1.0000000 0.7514756 [5,] 0.9026897 0.5245741 0.6848730 0.7514756 1.0000000 > .External(proxy:::R_apply_dist_list, x, y, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 0.8068091 0.8869376 0.9573450 0.8394141 0.9257044 [2,] 0.7091915 0.5310191 0.7490815 0.9743857 0.9583243 [3,] 0.7471004 0.4955903 0.7898068 0.6896626 0.5971429 [4,] 0.9837110 0.7703657 0.8896691 0.8665744 0.7655715 [5,] 0.7512326 0.9126553 0.9466108 0.6140860 0.7184058 > .External(proxy:::R_apply_dist_list, x, y, TRUE, f) [1] 0.8068091 0.5310191 0.7898068 0.8665744 0.7184058 > > ## logical matrix > > f <- function(a, b, c, d, n) + a / sqrt(a+b) / sqrt(a+c) > > x <- t(sapply(x, ">", 0.5)) > x [,1] [,2] [,3] [,4] [1,] FALSE FALSE TRUE FALSE [2,] TRUE FALSE FALSE FALSE [3,] FALSE FALSE FALSE TRUE [4,] TRUE TRUE FALSE TRUE [5,] FALSE FALSE TRUE FALSE > y <- t(sapply(y, ">", 0.5)) > > .External(proxy:::R_apply_dist_binary_matrix, x, NULL, FALSE, f) 1 2 3 4 2 0.0000000 3 0.0000000 0.0000000 4 0.0000000 0.5773503 0.5773503 5 1.0000000 0.0000000 0.0000000 0.0000000 > .External(proxy:::R_apply_dist_binary_matrix, x, x, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] 1 0.0000000 0.0000000 0.0000000 1 [2,] 0 1.0000000 0.0000000 0.5773503 0 [3,] 0 0.0000000 1.0000000 0.5773503 0 [4,] 0 0.5773503 0.5773503 1.0000000 0 [5,] 1 0.0000000 0.0000000 0.0000000 1 > .External(proxy:::R_apply_dist_binary_matrix, x, y, FALSE, f) [,1] [,2] [,3] [,4] [,5] [1,] NaN 0.7071068 0.7071068 0.0000000 0.5773503 [2,] NaN 0.0000000 0.0000000 0.7071068 0.5773503 [3,] NaN 0.0000000 0.7071068 0.7071068 0.5773503 [4,] NaN 0.4082483 0.4082483 0.8164966 0.6666667 [5,] NaN 0.7071068 0.7071068 0.0000000 0.5773503 > .External(proxy:::R_apply_dist_binary_matrix, x, y, TRUE, f) [1] NaN 0.0000000 0.7071068 0.8164966 0.5773503 > > ## data.frame > > f <- function(x, y) sum(x*y) / sqrt(sum(x*x)) / sqrt(sum(y*y)) > > x <- data.frame(unlist(apply(x, 2, list), recursive = FALSE)) > names(x) <- letters[1:4] > x a b c d 1 FALSE FALSE TRUE FALSE 2 TRUE FALSE FALSE FALSE 3 FALSE FALSE FALSE TRUE 4 TRUE TRUE FALSE TRUE 5 FALSE FALSE TRUE FALSE > y <- data.frame(unlist(apply(y, 2, list), recursive = FALSE)) > names(y) <- letters[1:4] > > .External(proxy:::R_apply_dist_data_frame, x, NULL, FALSE, f) 1 2 3 4 2 0.0000000 3 0.0000000 0.0000000 4 0.0000000 0.5773503 0.5773503 5 1.0000000 0.0000000 0.0000000 0.0000000 > .External(proxy:::R_apply_dist_data_frame, x, x, FALSE, f) 1 2 3 4 5 1 1 0.0000000 0.0000000 0.0000000 1 2 0 1.0000000 0.0000000 0.5773503 0 3 0 0.0000000 1.0000000 0.5773503 0 4 0 0.5773503 0.5773503 1.0000000 0 5 1 0.0000000 0.0000000 0.0000000 1 > .External(proxy:::R_apply_dist_data_frame, x, y, FALSE, f) 1 2 3 4 5 1 NaN 0.7071068 0.7071068 0.0000000 0.5773503 2 NaN 0.0000000 0.0000000 0.7071068 0.5773503 3 NaN 0.0000000 0.7071068 0.7071068 0.5773503 4 NaN 0.4082483 0.4082483 0.8164966 0.6666667 5 NaN 0.7071068 0.7071068 0.0000000 0.5773503 > .External(proxy:::R_apply_dist_data_frame, x, y, TRUE, f) [1] NaN 0.0000000 0.7071068 0.8164966 0.5773503 > > # > > f <- function(x, y) { + if (rownames(x) == 1 && rownames(y) == 1) { + print(x) + str(x) + print(y) + } + sum(x == y) / length(x) + } > > x <- data.frame(1:5, LETTERS[1:5], stringsAsFactors = FALSE) > x X1.5 LETTERS.1.5. 1 1 A 2 2 B 3 3 C 4 4 D 5 5 E > > y <- data.frame(1:6, LETTERS[c(1,1:5)], + row.names = letters[1:6], + stringsAsFactors = FALSE) > y X1.6 LETTERS.c.1..1.5.. a 1 A b 2 A c 3 B d 4 C e 5 D f 6 E > > all.equal(x, y) [1] "Names: 2 string mismatches" [2] "Attributes: < Component \"row.names\": Modes: numeric, character >" [3] "Attributes: < Component \"row.names\": Lengths: 5, 6 >" [4] "Attributes: < Component \"row.names\": target is numeric, current is character >" [5] "Component 1: Numeric: lengths (5, 6) differ" [6] "Component 2: Lengths (5, 6) differ (string compare on first 5)" [7] "Component 2: 4 string mismatches" > identical(attributes(x[[1]]), attributes(y[[1]])) [1] TRUE > identical(attributes(x[[2]]), attributes(y[[2]])) [1] TRUE > > .External(proxy:::R_apply_dist_data_frame, x, NULL, FALSE, f) 1 2 3 4 2 0 3 0 0 4 0 0 0 5 0 0 0 0 > .External(proxy:::R_apply_dist_data_frame, x, x, FALSE, f) X1.5 LETTERS.1.5. 1 1 A 'data.frame': 1 obs. of 2 variables: $ X1.5 : int 1 $ LETTERS.1.5.: chr "A" X1.5 LETTERS.1.5. 1 1 A 1 2 3 4 5 1 1 0 0 0 0 2 0 1 0 0 0 3 0 0 1 0 0 4 0 0 0 1 0 5 0 0 0 0 1 > .External(proxy:::R_apply_dist_data_frame, x, y, FALSE, f) X1.5 LETTERS.1.5. 1 1 A 'data.frame': 1 obs. of 2 variables: $ X1.5 : int 1 $ LETTERS.1.5.: chr "A" X1.5 LETTERS.1.5. 1 1 A a b c d e f 1 1 0.5 0.0 0.0 0.0 0.0 2 0 0.5 0.5 0.0 0.0 0.0 3 0 0.0 0.5 0.5 0.0 0.0 4 0 0.0 0.0 0.5 0.5 0.0 5 0 0.0 0.0 0.0 0.5 0.5 > .External(proxy:::R_apply_dist_data_frame, x, y[-6,], TRUE, f) X1.5 LETTERS.1.5. 1 1 A 'data.frame': 1 obs. of 2 variables: $ X1.5 : int 1 $ LETTERS.1.5.: chr "A" X1.5 LETTERS.1.5. 1 1 A [1] 1.0 0.5 0.5 0.5 0.5 > > > # test parameters > > f <- function(x, y, p = 1) + sum(x == y) / length(x) * p > > .External(proxy:::R_apply_dist_data_frame, x, y[-6,], TRUE, f) [1] 1.0 0.5 0.5 0.5 0.5 > .External(proxy:::R_apply_dist_data_frame, x, y[-6,], TRUE, f, p = 2) [1] 2 1 1 1 1 > > ### > > proc.time() user system elapsed 0.310 0.019 0.317 proxy/src/0000755000176200001440000000000014057153366012234 5ustar liggesusersproxy/src/util.c0000644000176200001440000001041713223372641013351 0ustar liggesusers#include #include // arrayIndex.c extern SEXP _int_array_subscript(int, SEXP, const char *, const char *, SEXP, Rboolean, SEXP); // subset a dist object. in order to preserve symmetry // we allow only one subset index. // // notes: (1) by definition we return a zero-length vector // for subscripts of length less than two. (2) coercing to // real is convenient but slightly inefficient. // // ceeboo 2006, 2007 SEXP R_subset_dist(SEXP R_x, SEXP s) { if (!inherits(R_x, "dist")) error("'x' not of class dist"); int i, j, k, si, sj, nx, ns; SEXP x = R_x, r, d; nx = 1 + (int) sqrt(2*LENGTH(x)); if (LENGTH(x) != nx*(nx-1)/2) error("'x' invalid length"); if (TYPEOF(x) != REALSXP) PROTECT(x = coerceVector(R_x, REALSXP)); PROTECT(r = allocArray(INTSXP, PROTECT(ScalarInteger(0)))); UNPROTECT(1); INTEGER(getAttrib(r, R_DimSymbol))[0] = nx; d = getAttrib(x, install("Labels")); if (!isNull(d)) { SEXP t; if (TYPEOF(d) != STRSXP) error("'Labels' not of type character"); if (LENGTH(d) != nx) error("'Labels' invalid length"); setAttrib(r, R_DimNamesSymbol, PROTECT(t = allocVector(VECSXP, 1))); UNPROTECT(1); SET_VECTOR_ELT(t, 0, d); } #ifdef _COMPAT_ PROTECT(s = arraySubscript(0, s, GET_DIM(r), getAttrib, (STRING_ELT), r)); #else PROTECT(s = _int_array_subscript(0, s, "dim", "dimnames", r, TRUE, R_NilValue)); #endif ns = LENGTH(s); for (k = 0; k < ns; k++) if (INTEGER(s)[k] == NA_INTEGER) error("'s' invalid subscript(s)"); else INTEGER(s)[k]--; PROTECT(r = allocVector(REALSXP, ns*(ns-1)/2)); k = 0; for (i = 0; i < ns-1; i++) { si = INTEGER(s)[i]; for (j = i+1; j < ns; j++) { sj = INTEGER(s)[j]; if (si == sj) REAL(r)[k++] = NA_REAL; else REAL(r)[k++] = (si > sj) ? REAL(x)[si+sj*(nx-1)-sj*(sj+1)/2-1] : REAL(x)[sj+si*(nx-1)-si*(si+1)/2-1]; } R_CheckUserInterrupt(); } if (x == R_x) copyMostAttrib(R_x, r); setAttrib(r, install("Size"), PROTECT(ScalarInteger(ns))); UNPROTECT(1); if (!isNull(d)) { SEXP t; setAttrib(r, install("Labels"), PROTECT(t = allocVector(STRSXP, ns))); UNPROTECT(1); for (k = 0; k < ns; k++) SET_STRING_ELT(t, k, STRING_ELT(d, INTEGER(s)[k])); } UNPROTECT(3); if (x != R_x) UNPROTECT(1); return r; } // compute the rowSums for an R dist object. due to // symmetry this is equivalent to colSums. rowMeans // are not implemented as these can be easily obtained // from the values of rowSums. // // na_rm implements the usual meaning of omitting NA // and NaN values, where the sum of the empty set is // zero. // // ceeboo 2006, 2007 SEXP R_rowSums_dist(SEXP R_x, SEXP na_rm) { if (!inherits(R_x, "dist")) error("'x' not of class dist"); if (isNull(na_rm) || TYPEOF(na_rm) != LGLSXP) error("'na.rm' not of type logical"); int i, j, k, n; SEXP x = R_x, r; n = 1 + (int) sqrt(2*LENGTH(x)); if (LENGTH(x) != n*(n-1)/2) error("'x' invalid length"); if (TYPEOF(x) != REALSXP) PROTECT(x = coerceVector(R_x, REALSXP)); PROTECT(r = allocVector(REALSXP, n)); memset(REAL(r), 0, sizeof(double)*n); k = 0; for (i = 0; i < n-1; i++) { for (j = i+1; j < n; j++) { double z = REAL(x)[k++]; if (!R_FINITE(z)) { if (ISNAN(z)) { if (LOGICAL(na_rm)[0] == TRUE) continue; REAL(r)[i] = REAL(r)[j] = (ISNA(z)) ? NA_REAL : R_NaN; } else REAL(r)[i] = REAL(r)[j] = z; break; } REAL(r)[i] += z; REAL(r)[j] += z; } R_CheckUserInterrupt(); } setAttrib(r, R_NamesSymbol, getAttrib(x, install("Labels"))); UNPROTECT(1); if (x != R_x) UNPROTECT(1); return r; } // produce row or column indexes SEXP R_row_dist(SEXP x, SEXP col) { if (!inherits(x, "dist")) error("'x' not of class dist"); if (isNull(col) || TYPEOF(col) != LGLSXP) error("'col' not of type logical"); int i, j, n, nx; SEXP r; nx = 1 + (int) sqrt(2*LENGTH(x)); if (LENGTH(x) != nx*(nx-1)/2) error("'x' invalid length"); PROTECT(r = allocVector(INTSXP, LENGTH(x))); n = 0; for (j = 1; j < nx; j++) for (i = j+1; i < nx+1; i++) INTEGER(r)[n++] = (*LOGICAL(col)) ? j : i; UNPROTECT(1); return r; } // proxy/src/apply.c0000644000176200001440000003434213437262301013522 0ustar liggesusers#include #include // wrapper functions for distance computation with // user-supplied functions given common data types, // such as matrix, list, and data.frames. // // note that the code is prone to breaking with new // releases of R. therefore, always check against the // reference examples. /* compute auto- or cross-distances with a user-supplied * function given matrix data. * * ceeboo 2006, 2007 */ SEXP R_apply_dist_matrix(SEXP p) { int i, j, k, l, n, nx, ny, nz, m = 0; SEXP r, c, tx, ty; SEXP R_x, x, R_y, y, R_d, f; p = CDR(p); if (length(p) < 4) error("invalid number of arguments"); R_x = x = CAR(p); R_y = y = CADR(p); if (!isMatrix(x) || (!isNull(y) && !isMatrix(y))) error("invalid data parameter(s)"); p = CDDR(p); R_d = CAR(p); if (TYPEOF(R_d) != LGLSXP) error("invalid option parameter"); p = CDR(p); f = CAR(p); if (!isFunction(f)) error("invalid function parameter"); p = CDR(p); if (isNull(y)) y = x; else if (LOGICAL(R_d)[0] == TRUE) m = 2; else m = 1; if ((n = INTEGER(GET_DIM(x))[1]) != INTEGER(GET_DIM(y))[1]) error("the number of columns of the data matrixes do not conform"); nz = nx = INTEGER(GET_DIM(x))[0]; ny = INTEGER(GET_DIM(y))[0]; if (m == 2 && nx != ny) error("the number of rows of the data matrixes do not conform"); if (TYPEOF(x) != REALSXP) { PROTECT(x = coerceVector(R_x, REALSXP)); if (isNull(R_y) || R_x == R_y) y = x; } if (TYPEOF(y) != REALSXP) PROTECT(y = coerceVector(R_y, REALSXP)); if (m == 0) { SEXP d; PROTECT(r = allocVector(REALSXP, nx*(nx-1)/2)); setAttrib(r, install("Size"), PROTECT(ScalarInteger(nx))); UNPROTECT(1); if (!isNull(d = getAttrib(x, R_DimNamesSymbol))) setAttrib(r, install("Labels"), VECTOR_ELT(d, 0)); // fixme: package? setAttrib(r, R_ClassSymbol, PROTECT(mkString("dist"))); UNPROTECT(1); } else if (m == 1) { SEXP d1, d2; PROTECT(r = allocMatrix(REALSXP, nx, ny)); d1 = getAttrib(x, R_DimNamesSymbol); d2 = getAttrib(y, R_DimNamesSymbol); if (!isNull(d1) || !isNull(d2)) { SEXP d; setAttrib(r, R_DimNamesSymbol, PROTECT(d = allocVector(VECSXP, 2))); UNPROTECT(1); SET_VECTOR_ELT(d, 0, isNull(d1) ? d1 : VECTOR_ELT(d1, 0)); SET_VECTOR_ELT(d, 1, isNull(d2) ? d2 : VECTOR_ELT(d2, 0)); } } else PROTECT(r = allocVector(REALSXP, nx)); PROTECT(tx = allocVector(REALSXP, n)); PROTECT(ty = allocVector(REALSXP, n)); PROTECT(c = LCONS(f, CONS(tx, CONS(ty, p)))); l = 0; for (j = 0; j < ny; j++) { for (k = 0; k < n; k++) REAL(ty)[k] = REAL(y)[j+k*ny]; if (m == 0) i = j+1; else if (m == 1) i = 0; else { i = j; nz = j+1; } for (; i < nz; i++) { for (k = 0; k < n; k++) REAL(tx)[k] = REAL(x)[i+k*nx]; SEXP s = eval(c, R_GlobalEnv); if (LENGTH(s) != 1) error("not a scalar return value"); // fixme: warning? if (TYPEOF(s) != REALSXP) { REAL(r)[l++] = REAL(coerceVector(PROTECT(s), REALSXP))[0]; UNPROTECT(1); } else REAL(r)[l++] = REAL(s)[0]; } R_CheckUserInterrupt(); } UNPROTECT(4); if (x != R_x) UNPROTECT(1); if (!isNull(R_y) && R_y != R_x && y != R_y) UNPROTECT(1); return r; } /* compute auto- or cross-distances with a user-supplied * function given list data. * * ceeboo 2006, 2007 */ SEXP R_apply_dist_list(SEXP p) { int i, j, l, nx, ny, nz, m = 0; SEXP r, c, d, tx, ty; SEXP x, y, f; p = CDR(p); if (length(p) < 4) error("invalid number of arguments"); x = CAR(p); y = CADR(p); if (TYPEOF(x) != VECSXP || (!isNull(y) && TYPEOF(y) != VECSXP)) error("invalid data parameter(s)"); p = CDDR(p); d = CAR(p); if (TYPEOF(d) != LGLSXP) error("invalid option parameter"); p = CDR(p); f = CAR(p); if (!isFunction(f)) error("invalid function parameter"); p = CDR(p); if (isNull(y)) y = x; else if (LOGICAL(d)[0] == TRUE) m = 2; else m = 1; nz = nx = LENGTH(x); ny = LENGTH(y); if (m == 0) { SEXP d; PROTECT(r = allocVector(REALSXP, nx*(nx-1)/2)); setAttrib(r, install("Size"), PROTECT(ScalarInteger(nx))); UNPROTECT(1); PROTECT(d = getAttrib(x, R_NamesSymbol)); if (!isNull(d)) setAttrib(r, install("Labels"), d); UNPROTECT(1); // fixme: package? setAttrib(r, R_ClassSymbol, PROTECT(mkString("dist"))); UNPROTECT(1); } else if (m == 1) { SEXP d1, d2; PROTECT(r = allocMatrix(REALSXP, nx, ny)); PROTECT(d1 = getAttrib(x, R_NamesSymbol)); PROTECT(d2 = getAttrib(y, R_NamesSymbol)); if (!isNull(d1) || !isNull(d2)) { SEXP d; setAttrib(r, R_DimNamesSymbol, PROTECT(d = allocVector(VECSXP, 2))); UNPROTECT(1); SET_VECTOR_ELT(d, 0, d1); SET_VECTOR_ELT(d, 1, d2); } UNPROTECT(2); } else { if (nx != ny) error("the number of components of 'x' and 'y' does not conform"); PROTECT(r = allocVector(REALSXP, nx)); } PROTECT(ty = CONS(R_NilValue, p)); PROTECT(tx = CONS(R_NilValue, ty)); PROTECT(c = LCONS(f, tx)); l = 0; for (j = 0; j < ny; j++) { SETCAR(ty, VECTOR_ELT(y, j)); if (m == 0) i = j+1; else if (m == 1) i = 0; else { i = j; nz = j+1; } for (; i < nz; i++) { SETCAR(tx, VECTOR_ELT(x, i)); SEXP s = eval(c, R_GlobalEnv); if (LENGTH(s) != 1) error("not a scalar return value"); // fixme: warning? if (TYPEOF(s) != REALSXP) { REAL(r)[l++] = REAL(coerceVector(PROTECT(s), REALSXP))[0]; UNPROTECT(1); } else REAL(r)[l++] = REAL(s)[0]; } R_CheckUserInterrupt(); } UNPROTECT(4); return r; } /* compute binary auto- or cross-distances with a user-supplied * function given logical matrix data, and by precomputing the * number of concordant and discordant pairs. * * dm 2007 */ SEXP R_apply_dist_binary_matrix(SEXP p) { int i, j, k, l, n, nx, ny, nz, m = 0; int i0, j0; SEXP r, c, d, ta, tb, tc, td, tn; SEXP x, y, f; p = CDR(p); if (length(p) < 3) error("invalid number of arguments"); x = CAR(p); y = CADR(p); if (!isMatrix(x) || TYPEOF(x) != LGLSXP || (!isNull(y) && (!isMatrix(y) || TYPEOF(x) != LGLSXP))) error("invalid data parameter(s)"); p = CDDR(p); d = CAR(p); if (TYPEOF(d) != LGLSXP) error("invalid option parameter"); p = CDR(p); f = CAR(p); if (!isFunction(f)) error("invalid function parameter"); p = CDR(p); if (isNull(y)) y = x; else if (LOGICAL(d)[0] == TRUE) m = 2; else m = 1; if ((n = INTEGER(GET_DIM(x))[1]) != INTEGER(GET_DIM(y))[1]) error("data parameters do not conform"); nz = nx = INTEGER(GET_DIM(x))[0]; ny = INTEGER(GET_DIM(y))[0]; if (m == 0) { SEXP d; PROTECT(r = allocVector(REALSXP, nx*(nx-1)/2)); setAttrib(r, install("Size"), PROTECT(ScalarInteger(nx))); UNPROTECT(1); if (!isNull(d = getAttrib(x, R_DimNamesSymbol))) setAttrib(r, install("Labels"), VECTOR_ELT(d, 0)); // fixme: package? setAttrib(r, R_ClassSymbol, PROTECT(mkString("dist"))); UNPROTECT(1); } else if (m == 1) { SEXP d1, d2; PROTECT(r = allocMatrix(REALSXP, nx, ny)); d1 = getAttrib(x, R_DimNamesSymbol); d2 = getAttrib(y, R_DimNamesSymbol); if (!isNull(d1) || !isNull(d2)) { SEXP d; setAttrib(r, R_DimNamesSymbol, PROTECT(d = allocVector(VECSXP, 2))); UNPROTECT(1); SET_VECTOR_ELT(d, 0, isNull(d1) ? d1 : VECTOR_ELT(d1, 0)); SET_VECTOR_ELT(d, 1, isNull(d2) ? d2 : VECTOR_ELT(d2, 0)); } } else { if (nx != ny) error("the number of rows of 'x' and 'y' does not conform"); PROTECT(r = allocVector(REALSXP, nx)); } PROTECT(ta = allocVector(INTSXP, 1)); PROTECT(tb = allocVector(INTSXP, 1)); PROTECT(tc = allocVector(INTSXP, 1)); PROTECT(td = allocVector(INTSXP, 1)); PROTECT(tn = allocVector(INTSXP, 1)); PROTECT(c = LCONS(f, CONS(ta, CONS(tb, CONS(tc, CONS(td, CONS(tn, p)))) ))); l = 0; for (j = 0; j < ny; j++) { if (m == 0) i = j+1; else if (m == 1) i = 0; else { i = j; nz = j+1; } for (; i < nz; i++) { INTEGER(ta)[0] = INTEGER(tb)[0] = INTEGER(tc)[0] = INTEGER(tn)[0] = 0; for (k = 0; k < n; k++) { i0 = LOGICAL(x)[i + k * nx]; j0 = LOGICAL(y)[j + k * ny]; if (i0 == NA_LOGICAL || j0 == NA_LOGICAL) continue; INTEGER(ta)[0] += (i0 == TRUE && j0 == TRUE); INTEGER(tb)[0] += (i0 == TRUE && j0 == FALSE); INTEGER(tc)[0] += (i0 == FALSE && j0 == TRUE); INTEGER(tn)[0]++; } if (INTEGER(tn)[0] == 0) INTEGER(td)[0] = 0; else INTEGER(td)[0] = INTEGER(tn)[0] - INTEGER(ta)[0] - INTEGER(tb)[0] - INTEGER(tc)[0]; SEXP s = eval(c, R_GlobalEnv); if (LENGTH(s) != 1) error("not a scalar return value"); // fixme: warning? if (TYPEOF(s) != REALSXP) { REAL(r)[l++] = REAL(coerceVector(PROTECT(s), REALSXP))[0]; UNPROTECT(1); } else REAL(r)[l++] = REAL(s)[0]; } R_CheckUserInterrupt(); } UNPROTECT(7); return r; } /* compute auto- or cross-distances with a user-supplied * function given data.frame data. * * because of the details this is insane ... * * ceeboo 2007 */ static void setElement(SEXP x, int i, SEXP y) { switch (TYPEOF(x)) { case LGLSXP: LOGICAL(x)[0] = LOGICAL(y)[i]; break; case INTSXP: INTEGER(x)[0] = INTEGER(y)[i]; break; case REALSXP: REAL(x)[0] = REAL(y)[i]; break; case STRSXP: SET_STRING_ELT(x, 0, STRING_ELT(y, i)); break; case VECSXP: SET_VECTOR_ELT(x, 0, VECTOR_ELT(y, i)); break; default: error("type not implemented"); } } SEXP R_apply_dist_data_frame(SEXP p) { int i, j, k, l, nc, nx, ny, nz, m = 0; SEXP r, c, d, tx, ty, rx, ry; SEXP x, y, f; p = CDR(p); if (length(p) < 4) error("invalid number of arguments"); x = CAR(p); y = CADR(p); if (!inherits(x, "data.frame") || (!isNull(y) && !inherits(y, "data.frame"))) error("invalid data parameter(s)"); p = CDDR(p); d = CAR(p); if (TYPEOF(d) != LGLSXP) error("invalid option parameter"); p = CDR(p); f = CAR(p); if (!isFunction(f)) error("invalid function parameter"); p = CDR(p); nc = LENGTH(x); if (nc == 0) error("cannot handle empty data frames"); nx = ny = nz = LENGTH(VECTOR_ELT(x, 0)); if (isNull(y)) y = x; else { if (LENGTH(y) != nc) error("data parameters do not conform"); ny = LENGTH(VECTOR_ELT(y, 0)); for (k = 0; k < nc; k++) { if (TYPEOF(VECTOR_ELT(x, k)) != TYPEOF(VECTOR_ELT(y, k))) error("data parameters do not conform"); // sucks: the c code in identical.c is not // accessible. c = eval(PROTECT(LCONS(install("identical"), PROTECT( CONS(ATTRIB(VECTOR_ELT(x, k)), CONS(ATTRIB(VECTOR_ELT(y, k)), R_NilValue))))), R_GlobalEnv); UNPROTECT(2); if (LOGICAL(c)[0] == FALSE) error("attributes of data parameters do not conform"); } if (LOGICAL(d)[0] == TRUE) { if (nx != ny) error("the number of rows of 'x' and 'y' do not conform"); m = 2; } else m = 1; } // fixme: row.names if (m == 0) { PROTECT(r = allocVector(REALSXP, nx*(nx-1)/2)); setAttrib(r, install("Size"), PROTECT(ScalarInteger(nx))); UNPROTECT(1); setAttrib(r, install("Labels"), PROTECT(coerceVector(PROTECT(getAttrib(x, install("row.names"))), STRSXP))); UNPROTECT(2); setAttrib(r, R_ClassSymbol, PROTECT(mkString("dist"))); UNPROTECT(1); } else if (m == 1) { SEXP d; PROTECT(r = allocMatrix(REALSXP, nx, ny)); setAttrib(r, R_DimNamesSymbol, PROTECT(d = allocVector(VECSXP, 2))); UNPROTECT(1); SET_VECTOR_ELT(d, 0, coerceVector(PROTECT(getAttrib(x, install("row.names"))), STRSXP)); UNPROTECT(1); SET_VECTOR_ELT(d, 1, coerceVector(PROTECT(getAttrib(y, install("row.names"))), STRSXP)); UNPROTECT(1); } else PROTECT(r = allocVector(REALSXP, nx)); PROTECT(tx = allocVector(VECSXP, nc)); setAttrib(tx, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); setAttrib(tx, install("row.names"), PROTECT(rx = allocVector(INTSXP, 1))); UNPROTECT(1); setAttrib(tx, R_ClassSymbol, getAttrib(x, R_ClassSymbol)); PROTECT(ty = allocVector(VECSXP, nc)); setAttrib(ty, R_NamesSymbol, getAttrib(x, R_NamesSymbol)); setAttrib(ty, install("row.names"), PROTECT(ry = allocVector(INTSXP, 1))); UNPROTECT(1); setAttrib(ty, R_ClassSymbol, getAttrib(x, R_ClassSymbol)); for (k = 0; k < nc; k++) { SEXP t, s; t = VECTOR_ELT(x, k); // fixme: should fail for S4 SET_VECTOR_ELT(tx, k, (s = allocVector(TYPEOF(t), 1))); SET_ATTRIB(s, ATTRIB(t)); // fixme: may be wrong SET_OBJECT(s, OBJECT(t)); SET_VECTOR_ELT(ty, k, (s = allocVector(TYPEOF(t), 1))); SET_ATTRIB(s, ATTRIB(t)); SET_OBJECT(s, OBJECT(t)); } PROTECT(c = LCONS(f, CONS(tx, CONS(ty, p)))); l = 0; for (j = 0; j < ny; j++) { for (k = 0; k < nc; k++) setElement(VECTOR_ELT(ty, k), j, VECTOR_ELT(y, k)); INTEGER(ry)[0] = j+1; // R index if (m == 0) i = j+1; else if (m == 1) i = 0; else { i = j; nz = j+1; } for (; i < nz; i++) { for (k = 0; k < nc; k++) setElement(VECTOR_ELT(tx, k), i, VECTOR_ELT(x, k)); INTEGER(rx)[0] = i+1; SEXP s = eval(c, R_GlobalEnv); if (LENGTH(s) != 1) error("not a scalar return value"); // fixme: warning? if (TYPEOF(s) != REALSXP) { REAL(r)[l++] = REAL(coerceVector(PROTECT(s), REALSXP))[0]; UNPROTECT(1); } else REAL(r)[l++] = REAL(s)[0]; } R_CheckUserInterrupt(); } UNPROTECT(4); return r; } // proxy/src/distance.c0000644000176200001440000002513413222626722014171 0ustar liggesusers#include #include #include // extends the code from stats/src/distance.c // to handle auto- and cross-distances. // // note: the runtime in the symmetric case is // not always optimal. // // ceeboo 2007, 2008, 2009, 2014, 2016 #define both_non_NA(a,b) (!ISNAN(a) && !ISNAN(b)) #define both_FINITE(a,b) (R_FINITE(a) && R_FINITE(b)) typedef double (* DFUN)(double *, double *, int, int, int); static double dfp = 1; static double minkowski(double *x, double *y, int nx, int ny, int nc) { double dev, dist; int count, j; count = 0; dist = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { dev = (*x - *y); if (!ISNAN(dev)) { dist += R_pow(fabs(dev), dfp); count++; } } x += nx; y += ny; } if (count == 0) return NA_REAL; if (count != nc) dist /= ((double)count/nc); return R_pow(dist, 1.0/dfp); } static double euclidean(double *x, double *y, int nx, int ny, int nc) { double dev, dist; int count, j; count = 0; dist = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { dev = (*x - *y); if (!ISNAN(dev)) { dist += dev * dev; count++; } } x += nx; y += ny; } if (count == 0) return NA_REAL; if (count != nc) dist /= ((double)count/nc); return sqrt(dist); } static double maximum(double *x, double *y, int nx, int ny, int nc) { double dev, dist; int count, j; count = 0; dist = -DBL_MAX; for (j = 0 ; j < nc ; j++) { if (both_non_NA(*x, *y)) { dev = fabs(*x - *y); if (!ISNAN(dev)) { if (dev > dist) dist = dev; count++; } } x += nx; y += ny; } if (count == 0) return NA_REAL; //if (count != nc) dist /= ((double)count/nc); return dist; } static double manhattan(double *x, double *y, int nx, int ny, int nc) { double dev, dist; int count, j; count = 0; dist = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { dev = fabs(*x - *y); if (!ISNAN(dev)) { dist += dev; count++; } } x += nx; y += ny; } if (count == 0) return NA_REAL; if (count != nc) dist /= ((double)count/nc); return dist; } // fixme: NA for two all-zero vectors static double canberra(double *x, double *y, int nx, int ny, int nc) { double dev, dist, sum, diff; int count, j; count = 0; dist = 0; for (j = 0 ;j < nc; j++) { if (both_non_NA(*x, *y)) { sum = fabs(*x + *y); diff = fabs(*x - *y); if (sum > DBL_MIN || diff > DBL_MIN) { dev = diff/sum; if (!ISNAN(dev) || (!R_FINITE(diff) && diff == sum && /* use Inf = lim x -> oo */ (dev = 1.))) { dist += dev; count++; } } } x += nx; y += ny; } if (count == 0) return NA_REAL; if (count != nc) dist /= ((double)count/nc); return dist; } // FIXME why treat not both finite as NA? static double binary(double *x, double *y, int nx, int ny, int nc) { int total, count, dist; int j; total = count = dist = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { if (*x || *y) { count++; if (!(*x && *y)) dist++; } total++; } x += nx; y += ny; } if (total == 0) return NA_REAL; if (count == 0) return 0; return (double) dist / count; } static double matching(double *x, double *y, int nx, int ny, int nc) { int total, count; int j; total = count = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { if (*x != *y) count++; total++; } x += nx; y += ny; } if (total == 0) return NA_REAL; if (count == 0) return 0; return (double) count / total; } static double fuzzy(double *x, double *y, int nx, int ny, int nc) { double dist, smax, smin; int count, j; count = 0; smax = smin = 0; for (j = 0; j < nc; j++) { if (both_FINITE(*x, *y)) { if (*x > *y) { smax += *x; smin += *y; } else { smax += *y; smin += *x; } count++; } x += nx; y += ny; } if (count == 0) return NA_REAL; if (!R_FINITE(smin)) return NA_REAL; dist = smin / smax; if (ISNAN(dist)) return 0; return 1-dist; } static double mutual(double *x, double *y, int nx, int ny, int nc) { double dist; int total, count, cx, cy, j; total = count = cx = cy = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { if (*x && *y) count++; cx += (*x && 1); cy += (*y && 1); total++; } x += nx; y += ny; } if (total == 0) return NA_REAL; if (cx == 0 || cy == 0 || cx == total || cy == total) return 0; dist = 0; if (count > 0) dist += (double) count / total * log((double) count / cx / cy * total); cy = total - cy; count = cx - count; if (count > 0) dist += (double) count / total * log((double) count / cx / cy * total); cx = total - cx; count = cy - count; if (count > 0) dist += (double) count / total * log((double) count / cx / cy * total); cy = total - cy; count = cx - count; if (count > 0) dist += (double) count / total * log((double) count / cx / cy * total); if (total != nc) dist /= ((double)total/nc); return dist ; } // wrapper static SEXP dists(SEXP R_x, SEXP R_y, SEXP R_d, DFUN f, SEXP R_p) { if (!isMatrix(R_x)) error("'x' not of class matrix"); if (!isNull(R_y) && !isMatrix(R_y)) error("'y' not of class matrix"); if (TYPEOF(R_d) != LGLSXP) error("'d' not of type logical"); int i, j, n, nx, ny, nc, m = 0; SEXP x = R_x, y = R_y, r; if (!isNull(R_p)) // fixme: check? dfp = *REAL(R_p); if (isNull(y)) y = x; else if (LOGICAL(R_d)[0] == TRUE) m = 2; else m = 1; // return matrix nc = INTEGER(GET_DIM(x))[1]; if (INTEGER(GET_DIM(y))[1] != nc) error("invalid number of columns"); nx = INTEGER(GET_DIM(x))[0]; ny = INTEGER(GET_DIM(y))[0]; if (m == 2 && nx != ny) error("invalid number of rows for pairwise mode"); if (TYPEOF(x) != REALSXP) { PROTECT(x = coerceVector(R_x, REALSXP)); if (isNull(R_y) || R_x == R_y) y = x; } if (TYPEOF(y) != REALSXP) PROTECT(y = coerceVector(R_y, REALSXP)); if (m == 0) { SEXP d; PROTECT(r = allocVector(REALSXP, nx*(nx-1)/2)); setAttrib(r, install("Size"), PROTECT(ScalarInteger(nx))); UNPROTECT(1); if (!isNull(d = getAttrib(x, R_DimNamesSymbol))) setAttrib(r, install("Labels"), VECTOR_ELT(d, 0)); // fixme: package? setAttrib(r, R_ClassSymbol, PROTECT(mkString("dist"))); UNPROTECT(1); } else if (m == 1) { SEXP d1, d2; PROTECT(r = allocMatrix(REALSXP, nx, ny)); d1 = getAttrib(x, R_DimNamesSymbol); d2 = getAttrib(y, R_DimNamesSymbol); if (!isNull(d1) || !isNull(d2)) { SEXP d; setAttrib(r, R_DimNamesSymbol, PROTECT(d = allocVector(VECSXP, 2))); UNPROTECT(1); SET_VECTOR_ELT(d, 0, isNull(d1) ? d1 : VECTOR_ELT(d1, 0)); SET_VECTOR_ELT(d, 1, isNull(d2) ? d2 : VECTOR_ELT(d2, 0)); } } else PROTECT(r = allocVector(REALSXP, nx)); n = 0; for (j = 0; j < ny; j++) { if (m == 2) REAL(r)[n++] = f(REAL(x)+j, REAL(y)+j, nx, ny, nc); else for (i = (m == 0) ? j+1 : 0; i < nx; i++) REAL(r)[n++] = f(REAL(x)+i, REAL(y)+j, nx, ny, nc); R_CheckUserInterrupt(); } UNPROTECT(1); if (x != R_x) UNPROTECT(1); if (!isNull(R_y) && R_y != R_x && y != R_y) UNPROTECT(1); return r; } // R wrappers SEXP R_minkowski_dist(SEXP x, SEXP y, SEXP d, SEXP p) { if (isNull(p)) error("'p' invalid"); return dists(x, y, d, minkowski, p); } SEXP R_euclidean_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, euclidean, R_NilValue); } SEXP R_maximum_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, maximum, R_NilValue); } SEXP R_manhattan_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, manhattan, R_NilValue); } SEXP R_canberra_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, canberra, R_NilValue); } SEXP R_binary_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, binary, R_NilValue); } SEXP R_matching_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, matching, R_NilValue); } SEXP R_fuzzy_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, fuzzy, R_NilValue); } SEXP R_mutual_dist(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, mutual, R_NilValue); } // 2017/10 SEXP R_bjaccard(SEXP x, SEXP y, SEXP d) { SEXP r = dists(x, y, d, binary, R_NilValue); for (int k = 0; k < LENGTH(r); k++) { double z = REAL(r)[k]; if (!ISNAN(z)) REAL(r)[k] = 1 - z; } return r; } static double ebinary(double *x, double *y, int nx, int ny, int nc) { double dev, prod, dist, xy; int count; int j; dist = xy = 0; count = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { dev = (*x - *y); prod = *x * *y; if (!ISNAN(dev) && !ISNAN(prod)) { dist += dev * dev; xy += prod; count++; } } x += nx; y += ny; } if (count == 0) return NA_REAL; if (!R_FINITE(xy)) return NA_REAL; dist = dist / dfp + xy; xy /= dist; if (ISNAN(xy)) if (dist < DBL_MIN) return 1; else return NA_REAL; return xy; } SEXP R_ess2(SEXP x, SEXP y, SEXP d) { dfp = .5; return dists(x, y, d, ebinary, R_NilValue); } SEXP R_ejaccard(SEXP x, SEXP y, SEXP d) { dfp = 1; return dists(x, y, d, ebinary, R_NilValue); } SEXP R_edice(SEXP x, SEXP y, SEXP d) { dfp = 2; return dists(x, y, d, ebinary, R_NilValue); } static double cosine(double *x, double *y, int nx, int ny, int nc) { double prod, xy, xx, yy; int count; int j; xy = xx = yy = 0; count = 0; for (j = 0; j < nc; j++) { if (both_non_NA(*x, *y)) { prod = *x * *y; if (!ISNAN(prod)) { xy += prod; xx += *x * *x; yy += *y * *y; count++; } } x += nx; y += ny; } if (count == 0) return NA_REAL; if (!R_FINITE(xy)) return NA_REAL; xy /= sqrt(xx) * sqrt(yy); if (ISNAN(xy)) if (xx < DBL_MIN && yy < DBL_MIN) return 1; else if (xx < DBL_MIN || yy < DBL_MIN) return 0; else return NA_REAL; return xy; } SEXP R_cosine(SEXP x, SEXP y, SEXP d) { return dists(x, y, d, cosine, R_NilValue); } // proxy/src/arrayIndex.c0000644000176200001440000001662112265347202014505 0ustar liggesusers #include #include // workaround i18n #define _(x) (x) // copied from 2.14-2 src/main/subscript.c // // ceeboo 2011/11 2014/1 // #define ECALL(call, yy) if(call == R_NilValue) error(yy); else errorcall(call, yy); static SEXP nullSubscript(int n) { int i; SEXP indx; indx = allocVector(INTSXP, n); for (i = 0; i < n; i++) INTEGER(indx)[i] = i + 1; return indx; } static SEXP logicalSubscript(SEXP s, int ns, int nx, int *stretch, SEXP call) { int canstretch, count, i, nmax; SEXP indx; canstretch = *stretch; if (!canstretch && ns > nx) { ECALL(call, _("(subscript) logical subscript too long")); } nmax = (ns > nx) ? ns : nx; *stretch = (ns > nx) ? ns : 0; if (ns == 0) return(allocVector(INTSXP, 0)); count = 0; for (i = 0; i < nmax; i++) if (LOGICAL(s)[i%ns]) count++; indx = allocVector(INTSXP, count); count = 0; for (i = 0; i < nmax; i++) if (LOGICAL(s)[i%ns]) { if (LOGICAL(s)[i%ns] == NA_LOGICAL) INTEGER(indx)[count++] = NA_INTEGER; else INTEGER(indx)[count++] = i + 1; } return indx; } static SEXP negativeSubscript(SEXP s, int ns, int nx, SEXP call) { SEXP indx; int stretch = 0; int i, ix; PROTECT(indx = allocVector(LGLSXP, nx)); for (i = 0; i < nx; i++) LOGICAL(indx)[i] = 1; for (i = 0; i < ns; i++) { ix = INTEGER(s)[i]; if (ix != 0 && ix != NA_INTEGER && -ix <= nx) LOGICAL(indx)[-ix - 1] = 0; } s = logicalSubscript(indx, nx, nx, &stretch, call); UNPROTECT(1); return s; } static SEXP positiveSubscript(SEXP s, int ns, int nx) { SEXP indx; int i, zct = 0; for (i = 0; i < ns; i++) { if (INTEGER(s)[i] == 0) zct++; } if (zct) { indx = allocVector(INTSXP, (ns - zct)); for (i = 0, zct = 0; i < ns; i++) if (INTEGER(s)[i] != 0) INTEGER(indx)[zct++] = INTEGER(s)[i]; return indx; } else return s; } static SEXP integerSubscript(SEXP s, int ns, int nx, int *stretch, SEXP call) { int i, ii, min, max, canstretch; Rboolean isna = FALSE; canstretch = *stretch; *stretch = 0; min = 0; max = 0; for (i = 0; i < ns; i++) { ii = INTEGER(s)[i]; if (ii != NA_INTEGER) { if (ii < min) min = ii; if (ii > max) max = ii; } else isna = TRUE; } if (max > nx) { if(canstretch) *stretch = max; else { ECALL(call, _("subscript out of bounds")); } } if (min < 0) { if (max == 0 && !isna) return negativeSubscript(s, ns, nx, call); else { ECALL(call, _("only 0's may be mixed with negative subscripts")); } } else return positiveSubscript(s, ns, nx); return R_NilValue; } /* This uses a couple of horrible hacks in conjunction with * VectorAssign (in subassign.c). If subscripting is used for * assignment, it is possible to extend a vector by supplying new * names, and we want to give the extended vector those names, so they * are returned as the use.names attribute. Also, unset elements of the vector * of new names (places where a match was found) are indicated by * setting the element of the newnames vector to NULL. */ /* The original code (pre 2.0.0) used a ns x nx loop that was too * slow. So now we hash. Hashing is expensive on memory (up to 32nx * bytes) so it is only worth doing if ns * nx is large. If nx is * large, then it will be too slow unless ns is very small. */ static SEXP stringSubscript(SEXP s, int ns, int nx, SEXP names, int *stretch, Rboolean in, SEXP call) { SEXP indx, indexnames; int i, j, nnames, sub, extra; int canstretch = *stretch; /* product may overflow, so check factors as well. */ Rboolean usehashing = in && ( ((ns > 1000 && nx) || (nx > 1000 && ns)) || (ns * nx > 15*nx + ns) ); PROTECT(s); PROTECT(names); PROTECT(indexnames = allocVector(VECSXP, ns)); nnames = nx; extra = nnames; /* Process each of the subscripts. First we compare with the names * on the vector and then (if there is no match) with each of the * previous subscripts, since (if assigning) we may have already * added an element of that name. (If we are not assigning, any * nonmatch will have given an error.) */ if(usehashing) { /* must be internal, so names contains a character vector */ /* NB: this does not behave in the same way with respect to "" and NA names: they will match */ PROTECT(indx = match(names, s, 0)); /* second pass to correct this */ for (i = 0; i < ns; i++) if(STRING_ELT(s, i) == NA_STRING || !CHAR(STRING_ELT(s, i))[0]) INTEGER(indx)[i] = 0; for (i = 0; i < ns; i++) SET_VECTOR_ELT(indexnames, i, R_NilValue); } else { PROTECT(indx = allocVector(INTSXP, ns)); for (i = 0; i < ns; i++) { sub = 0; if (names != R_NilValue) { for (j = 0; j < nnames; j++) { SEXP names_j = STRING_ELT(names, j); if (!in && TYPEOF(names_j) != CHARSXP) { ECALL(call, _("character vector element does not have type CHARSXP")); } if (NonNullStringMatch(STRING_ELT(s, i), names_j)) { sub = j + 1; SET_VECTOR_ELT(indexnames, i, R_NilValue); break; } } } INTEGER(indx)[i] = sub; } } for (i = 0; i < ns; i++) { sub = INTEGER(indx)[i]; if (sub == 0) { for (j = 0 ; j < i ; j++) if (NonNullStringMatch(STRING_ELT(s, i), STRING_ELT(s, j))) { sub = INTEGER(indx)[j]; SET_VECTOR_ELT(indexnames, i, STRING_ELT(s, j)); break; } } if (sub == 0) { if (!canstretch) { ECALL(call, _("subscript out of bounds")); } extra += 1; sub = extra; SET_VECTOR_ELT(indexnames, i, STRING_ELT(s, i)); } INTEGER(indx)[i] = sub; } /* We return the new names as the names attribute of the returned subscript vector. */ if (extra != nnames) setAttrib(indx, install("use.names"), indexnames); if (canstretch) *stretch = extra; UNPROTECT(4); return indx; } /* Array Subscripts. dim is the dimension (0 to k-1) s is the subscript list, dn is the attribute name of dim dnn is the attribute name of dimnames x is the array to be subscripted. */ SEXP _int_array_subscript(int dim, SEXP s, const char *dn, const char *dnn, SEXP x, Rboolean in, SEXP call) { int nd, ns, stretch = 0; SEXP dnames, tmp; ns = LENGTH(s); nd = INTEGER(getAttrib(x, install(dn)))[dim]; switch (TYPEOF(s)) { case NILSXP: return allocVector(INTSXP, 0); case LGLSXP: return logicalSubscript(s, ns, nd, &stretch, call); case INTSXP: return integerSubscript(s, ns, nd, &stretch, call); case REALSXP: PROTECT(tmp = coerceVector(s, INTSXP)); tmp = integerSubscript(tmp, ns, nd, &stretch, call); UNPROTECT(1); return tmp; case STRSXP: dnames = getAttrib(x, install(dnn)); if (dnames == R_NilValue) { ECALL(call, _("no 'dimnames' attribute for array")); } dnames = VECTOR_ELT(dnames, dim); return stringSubscript(s, ns, nd, dnames, &stretch, in, call); case SYMSXP: if (s == R_MissingArg) return nullSubscript(nd); default: if (call == R_NilValue) error(_("invalid subscript type '%s'"), type2char(TYPEOF(s))); else errorcall(call, _("invalid subscript type '%s'"), type2char(TYPEOF(s))); } return R_NilValue; } // R interface SEXP R_arraySubscript(SEXP x, SEXP dim, SEXP s, SEXP dn, SEXP dnn) { // FIXME return _int_array_subscript(INTEGER(dim)[0], s, (const char *) CHAR(STRING_ELT(dn, 0)), (const char *) CHAR(STRING_ELT(dnn, 0)), x, TRUE, R_NilValue); } // proxy/src/dll.c0000644000176200001440000000473613044065103013147 0ustar liggesusers #include #include #include extern SEXP R_minkowski_dist(SEXP x, SEXP y, SEXP d, SEXP p); extern SEXP R_euclidean_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_maximum_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_manhattan_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_canberra_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_binary_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_matching_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_fuzzy_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_mutual_dist(SEXP x, SEXP y, SEXP d); extern SEXP R_bjaccard(SEXP R_x, SEXP R_y, SEXP R_d); extern SEXP R_ejaccard(SEXP R_x, SEXP R_y, SEXP R_d); extern SEXP R_edice(SEXP R_x, SEXP R_y, SEXP R_d); extern SEXP R_cosine(SEXP R_x, SEXP R_y, SEXP R_d); extern SEXP R_subset_dist(SEXP R_x, SEXP s); extern SEXP R_rowSums_dist(SEXP R_x, SEXP na_rm); extern SEXP R_row_dist(SEXP x, SEXP col); extern SEXP R_apply_dist_matrix(SEXP p); extern SEXP R_apply_dist_list(SEXP p); extern SEXP R_apply_dist_binary_matrix(SEXP p); extern SEXP R_apply_dist_data_frame(SEXP p); static const R_CallMethodDef CallEntries[] = { {"R_minkowski_dist", (DL_FUNC) R_minkowski_dist, 4}, {"R_euclidean_dist", (DL_FUNC) R_euclidean_dist, 3}, {"R_maximum_dist", (DL_FUNC) R_maximum_dist, 3}, {"R_manhattan_dist", (DL_FUNC) R_manhattan_dist, 3}, {"R_canberra_dist", (DL_FUNC) R_canberra_dist, 3}, {"R_binary_dist", (DL_FUNC) R_binary_dist, 3}, // {"R_matching_dist", (DL_FUNC) R_matching_dist, 3}, {"R_fuzzy_dist", (DL_FUNC) R_fuzzy_dist, 3}, // {"R_mutual_dist", (DL_FUNC) R_mutual_dist, 3}, {"R_bjaccard", (DL_FUNC) R_bjaccard, 3}, {"R_ejaccard", (DL_FUNC) R_ejaccard, 3}, {"R_edice", (DL_FUNC) R_edice, 3}, {"R_cosine", (DL_FUNC) R_cosine, 3}, {"R_subset_dist", (DL_FUNC) R_subset_dist, 2}, {"R_rowSums_dist", (DL_FUNC) R_rowSums_dist, 2}, {"R_row_dist", (DL_FUNC) R_row_dist, 2}, {NULL, NULL, 0} }; static const R_ExternalMethodDef ExternalEntries[] = { {"R_apply_dist_matrix", (DL_FUNC) R_apply_dist_matrix, -1}, {"R_apply_dist_list", (DL_FUNC) R_apply_dist_list, -1}, {"R_apply_dist_binary_matrix", (DL_FUNC) R_apply_dist_binary_matrix, -1}, {"R_apply_dist_data_frame", (DL_FUNC) R_apply_dist_data_frame, -1}, {NULL, NULL, 0} }; void R_init_proxy(DllInfo *dll) { R_registerRoutines(dll, NULL, CallEntries, NULL, ExternalEntries); R_useDynamicSymbols(dll, FALSE); } proxy/vignettes/0000755000176200001440000000000014057153366013455 5ustar liggesusersproxy/vignettes/overview.Rnw0000755000176200001440000000145711714440422016012 0ustar liggesusers\documentclass{article} \title{Proximity measures in the \texttt{proxy} package for {\sf R}} \author{David Meyer} %\VignetteIndexEntry{Proximity measures in the proxy package for R} %\VignetteDepends{proxy} %\VignetteKeywords{similarities,dissimilarities,distance} %\VignettePackage{proxy} <>= library(proxy) x <- summary(pr_DB, "long") FUN <- function(index) { for (i in which(index)) { writeLines(sprintf("Aliases: %s", paste(x$names[[i]], collapse = ", "))) writeLines(sprintf("Type : %s", x$type[i])) writeLines(sprintf("Formula: %s\n", x$formula[i])) } } @ \begin{document} \maketitle \section{Similarities} <>= FUN(x$distance == FALSE) @ \section{Dissimilarities} <>= FUN(x$distance == TRUE) @ \end{document} proxy/R/0000755000176200001440000000000014057153366011646 5ustar liggesusersproxy/R/util.R0000655000176200001440000000254213037347321012744 0ustar liggesusers ## wrappers for class dist ## ## note that all type checking and coercing ## is now done in C, as well as handling of ## attributes. ## ## fixme: create generic functions? ## ## ceeboo 2007 dim.dist <- function(x) rep.int(attr(x, "Size"), 2) # works with nrow and ncol dimnames.dist <- names.dist <- function(x) attr(x, "Labels") "dimnames<-.dist" <- "names<-.dist" <- function(x, value) { if (is.null(value)) attr(x, "Labels") <- NULL else { if (length(value) != attr(x, "Size")) stop("dimension of 'x' and length of 'value' do not conform") attr(x, "Labels") <- as.character(value) } x } row.dist <- function(x) .Call(R_row_dist, x, FALSE) col.dist <- function(x) .Call(R_row_dist, x, TRUE) ## subset.dist <- "[[.dist" <- function(x, subset, ...) { if (missing(subset)) return(x) .Call(R_subset_dist, x, unique(subset)) } ## rowSums.dist <- colSums.dist <- function(x, na.rm = FALSE) .Call(R_rowSums_dist, x, na.rm) ## rowMeans.dist <- colMeans.dist <- function(x, na.rm = FALSE, diag = TRUE) { if (!is.logical(diag)) stop("'diag' not of type logical") s <- rowSums.dist(x, na.rm) if (na.rm) { x[!(is.na(x) | is.nan(x))] <- 1 s / (rowSums.dist(x, na.rm) + (diag == TRUE)) } else s / (length(s) - (diag == FALSE)) } ### proxy/R/similarities.R0000644000176200001440000006275714057151030014472 0ustar liggesusers### Binary measures pr_Jaccard <- function(a, b, c, d, n) a / (n - d) pr_Jaccard_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$abcd <- TRUE reg_entry$FUN <- "pr_Jaccard" } else { storage.mode(x) <- "logical" if (!is.null(y)) storage.mode(y) <- "logical" } list(x = x, y = y, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_bjaccard", names = c("Jaccard","binary","Reyssac","Roux"), distance = FALSE, PREFUN = "pr_Jaccard_prefun", convert = "pr_simil2dist", type = "binary", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "a / (a + b + c)", reference = "Jaccard, P. (1908). Nouvelles recherches sur la distribution florale. Bull. Soc. Vaud. Sci. Nat., 44, pp. 223--270.", description = "The Jaccard Similarity (C implementation) for binary data. It is the proportion of (TRUE, TRUE) pairs, but not considering (FALSE, FALSE) pairs. So it compares the intersection with the union of object sets.") pr_Kulczynski1 <- function(a, b, c, d, n) a / (b + c) pr_DB$set_entry(FUN = "pr_Kulczynski1", names = "Kulczynski1", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "a / (b + c)", reference = "Kurzcynski, T.W. (1970). Generalized distance and discrete variables. Biometrics, 26, pp. 525--534.", description = "Kulczynski Similarity for binary data. Relates the (TRUE, TRUE) pairs to discordant pairs.") pr_Kulczynski2 <- function(a, b, c, d, n) (a / (a + b) + a / (a + c)) / 2 pr_DB$set_entry(FUN = "pr_Kulczynski2", names = "Kulczynski2", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "[a / (a + b) + a / (a + c)] / 2", reference = "Kurzcynski, T.W. (1970). Generalized distance and discrete variables. Biometrics, 26, pp. 525--534.", description = "Kulczynski Similarity for binary data. Relates the (TRUE, TRUE) pairs to the discordant pairs.") pr_Mountford <- function(a, b, c, d, n) 2 * a / (a * (b + c) + 2 * b * c) pr_DB$set_entry(FUN = "pr_Mountford", names = "Mountford", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "2a / (ab + ac + 2bc)", reference = "Mountford, M.D. (1962). An index of similarity and its application to classificatory probems. In P.W. Murphy (ed.), Progress in Soil Zoology, pp. 43--50. Butterworth, London.", description = "The Mountford Similarity for binary data.") pr_fagerMcgowan <- function(a, b, c, d, n) a / sqrt((a + b) * (a + c)) - sqrt(a + c) / 2 pr_DB$set_entry(FUN = "pr_fagerMcgowan", names = c("Fager", "McGowan"), distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "a / sqrt((a + b)(a + c)) - sqrt(a + c) / 2", reference = "Fager, E. W. and McGowan, J. A. (1963). Zooplankton species groups in the North Pacific. Science, N. Y. 140: 453-460", description = "The Fager / McGowan distance.") pr_RusselRao <- function(a, b, c, d, n) a / n pr_DB$set_entry(FUN = "pr_RusselRao", names = c("Russel","Rao"), distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "a / n", reference = "Russell, P.F., and Rao T.R. (1940). On habitat and association of species of anopheline larvae in southeastern, Madras, J. Malaria Inst. India 3, pp. 153--178", description = "The Russel/Rao Similarity for binary data. It is just the proportion of (TRUE, TRUE) pairs.") pr_SimpleMatching <- function(a, b, c, d, n) (a + d) / n pr_DB$set_entry(FUN = "pr_SimpleMatching", names = c("simple matching", "Sokal/Michener"), distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "(a + d) / n", reference = "Sokal, R.R., and Michener, C.D. (1958). A statistical method for evaluating systematic relationships. Univ. Kansas Sci. Bull., 39, pp. 1409--1438.", description = "The Simple Matching Similarity or binary data. It is the proportion of concordant pairs.") pr_Hamman <- function(a, b, c, d, n) (a + d - b - c) / n pr_DB$set_entry(FUN = "pr_Hamman", names = "Hamman", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "([a + d] - [b + c]) / n", reference = "Hamann, U. (1961). Merkmalbestand und Verwandtschaftsbeziehungen der Farinosae. Ein Beitrag zum System der Monokotyledonen. Willdenowia, 2, pp. 639-768.", description = "The Hamman Matching Similarity for binary data. It is the proportion difference of the concordant and discordant pairs.") pr_Faith <- function(a, b, c, d, n) (a + d / 2) / n pr_DB$set_entry(FUN = "pr_Faith", names = "Faith", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "(a + d/2) / n", reference = "Belbin, L., Marshall, C. & Faith, D.P. (1983). Representing relationships by automatic assignment of colour. The Australian Computing Journal 15, 160-163.", description = "The Faith similarity") pr_RogersTanimoto <- function(a, b, c, d, n) (a + d) / (a + 2 * (b + c) + d) pr_DB$set_entry(FUN = "pr_RogersTanimoto", names = c("Tanimoto", "Rogers"), distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "(a + d) / (a + 2b + 2c + d)", reference = "Rogers, D.J, and Tanimoto, T.T. (1960). A computer program for classifying plants. Science, 132, pp. 1115--1118.", description = "The Rogers/Tanimoto Similarity for binary data. Similar to the simple matching coefficient, but putting double weight on the discordant pairs.") pr_Dice <- function(a, b, c, d, n) 2 * a / (2 * a + b + c) pr_DB$set_entry(FUN = "pr_Dice", names = c("Dice", "Czekanowski", "Sorensen"), distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "2a / (2a + b + c)", reference = "Dice, L.R. (1945). Measures of the amount of ecologic association between species. Ecolology, 26, pp. 297--302.", description = "The Dice Similarity") pr_Phi <- function(a, b, c, d, n) (a * d - b * c) / (sqrt(a + b) * sqrt(c + d) * sqrt(a + c) * sqrt(b + d)) pr_DB$set_entry(FUN = "pr_Phi", names = "Phi", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "(ad - bc) / sqrt[(a + b)(c + d)(a + c)(b + d)]", reference = "Sokal, R.R, and Sneath, P.H.A. (1963). Principles of numerical taxonomy. W.H. Freeman and Company, San Francisco.", description = "The Phi Similarity (= Product-Moment-Correlation for binary variables)") pr_Stiles <- function(a, b, c, d, n) log(n) + 2 * log(abs(a * d - b * c) - 0.5 * n) - log(a + b) - log(c + d) - log(a + c) - log(b + d) pr_DB$set_entry(FUN = "pr_Stiles", names = "Stiles", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "log(n(|ad-bc| - 0.5n)^2 / [(a + b)(c + d)(a + c)(b + d)])", reference = "Stiles, H.E. (1961). The association factor in information retrieval. Communictions of the ACM, 8, 1, pp. 271--279.", description = "The Stiles Similarity (used for information retrieval). Identical to the logarithm of Krylov's distance.") pr_Michael <- function(a, b, c, d, n) 4 * (a * d - b * c) / ((a + d) * (a + d) + (b + c) * (b + c)) pr_DB$set_entry(FUN = "pr_Michael", names = "Michael", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "4(ad - bc) / [(a + d)^2 + (b + c)^2]", reference = "Cox, T.F., and Cox, M.A.A. (2001). Multidimensional Scaling. Chapmann and Hall.", description = "The Michael Similarity") pr_MozleyMargalef <- function(a, b, c, d, n) a * n / ((a + b) * (a + c)) pr_DB$set_entry(FUN = "pr_MozleyMargalef", names = c("Mozley","Margalef"), distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "an / (a + b)(a + c)", reference = "Margalef, D.R. (1958). Information theory in ecology. Gen. Systems, 3, pp. 36--71.", description = "The Mozley/Margalef Similarity") pr_Yule<- function(a, b, c, d, n) { ad <- a * d bc <- b * c (ad - bc) / (ad + bc) } pr_DB$set_entry(FUN = "pr_Yule", names = "Yule", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "(ad - bc) / (ad + bc)", reference = "Yule, G.U. (1912). On measuring associations between attributes. J. Roy. Stat. Soc., 75, pp. 579--642.", description = "Yule Similarity") pr_Yule2<- function(a, b, c, d, n) { ad <- a * d bc <- b * c (sqrt(ad) - sqrt(bc)) / (sqrt(ad) + sqrt(bc)) } pr_DB$set_entry(FUN = "pr_Yule2", names = "Yule2", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "(sqrt(ad) - sqrt(bc)) / (sqrt(ad) + sqrt(bc))", reference = "Yule, G.U. (1912). On measuring associations between attributes. J. Roy. Stat. Soc., 75, pp. 579--642.", description = "Yule Similarity") pr_Ochiai <- function(a, b, c, d, n) a / sqrt((a + b) * (a + c)) pr_DB$set_entry(FUN = "pr_Ochiai", names = "Ochiai", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "a / sqrt[(a + b)(a + c)]", reference = "Sokal, R.R, and Sneath, P.H.A. (1963). Principles of numerical taxonomy. W.H. Freeman and Company, San Francisco.", description = "The Ochiai Similarity") pr_Simpson <- function(a, b, c, d, n) a / min((a + b), (a + c)) pr_DB$set_entry(FUN = "pr_Simpson", names = "Simpson", distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "a / min{(a + b), (a + c)}", reference = "Simpson, G.G. (1960). Notes on the measurement of faunal resemblance. American Journal of Science 258-A: 300-311.", description = "The Simpson Similarity (used in Zoology).") pr_BraunBlanquet <- function(a, b, c, d, n) a / max((a + b), (a + c)) pr_DB$set_entry(FUN = "pr_BraunBlanquet", names = c("Braun-Blanquet"), distance = FALSE, convert = "pr_simil2dist", type = "binary", loop = TRUE, C_FUN = FALSE, abcd = TRUE, formula = "a / max{(a + b), (a + c)}", reference = "Braun-Blanquet, J. (1964): Pflanzensoziologie. Springer Verlag, Wien and New York.", description = "The Braun-Blanquet Similarity (used in Biology).") pr_cos <- function(x, y) abs(crossprod(x, y) / sqrt(crossprod(x) * crossprod(y))) pr_cos_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_cos" } list(x = x, y = y, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_cosine", names = c("cosine", "angular"), PREFUN = "pr_cos_prefun", distance = FALSE, convert = function (x) 1 - x, type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "xy / sqrt(xx * yy)", reference = "Anderberg, M.R. (1973). Cluster Analysis for Applicaitons. Academic Press.", description = "The cos Similarity (C implementation)") pr_eJaccard <- function(x, y) { tmp <- crossprod(x, y) tmp / (crossprod(x) + crossprod(y) - tmp) } pr_eJaccard_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_eJaccard" } list(x = 0 + x, y = if (!is.null(y)) 0 + y else NULL, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_ejaccard", names = c("eJaccard", "extended_Jaccard"), PREFUN = "pr_eJaccard_prefun", distance = FALSE, convert = "pr_simil2dist", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "xy / (xx + yy - xy)", reference = "Strehl A. and Ghosh J. (2000). Value-based customer grouping from large retail data-sets. In Proc. SPIE Conference on Data Mining and Knowledge Discovery, Orlando, volume 4057, pages 33-42. SPIE.", description = "The extended Jaccard Similarity (C implementation; yields Jaccard for binary x,y).") pr_eDice <- function(x, y) { tmp <- crossprod(x, y) tmp / (crossprod(x) + crossprod(y)) } pr_eDice_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_eDice" } list(x = 0 + x, y = if (!is.null(y)) 0 + y else NULL, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_edice", names = c("eDice", "extended_Dice","eSorensen"), PREFUN = "pr_eDice_prefun", distance = FALSE, convert = "pr_simil2dist", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "2xy / (xx + yy)", ## FIXME reference = "Alexander Strehl. Relationship-based Clustering and Cluster Ensembles for High-dimensional Data Mining. PhD thesis, The University of Texas at Austin, May 2002.", description = "The extended Dice Similarity (C implementation; yields Dice for binary x,y).") pr_cor <- function(x, y) { X <- x - mean(x) Y <- y - mean(y) crossprod(X, Y) / sqrt(crossprod(X) * crossprod(Y)) } pr_DB$set_entry(FUN = "pr_cor", names = "correlation", distance = FALSE, convert = "pr_simil2dist", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "xy / sqrt(xx * yy) for centered x,y", reference = "Anderberg, M.R. (1973). Cluster Analysis for Applicaitons. Academic Press.", description = "correlation (taking n instead of n-1 for the variance)") pr_ChiSquared <- function(x, y) { tab <- table(x,y) exp <- rowSums(tab) %o% colSums(tab) / sum(tab) sum((tab - exp) ^ 2 / exp) } pr_DB$set_entry(FUN = "pr_ChiSquared", names = "Chi-squared", distance = FALSE, convert = "pr_simil2dist", type = "nominal", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sum_ij (o_i - e_i)^2 / e_i", reference = "Anderberg, M.R. (1973). Cluster Analysis for Applicaitons. Academic Press.", description = "Sum of standardized squared deviations from observed and expected values in a cross-tab for x and y.") pr_PhiSquared <- function(x, y) { tab <- table(x,y) exp <- rowSums(tab) %o% colSums(tab) / sum(tab) sum((tab - exp) ^ 2 / exp) / sum(tab) } pr_DB$set_entry(FUN = "pr_PhiSquared", names = "Phi-squared", distance = FALSE, convert = "pr_simil2dist", type = "nominal", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "[sum_ij (o_i - e_i)^2 / e_i] / n", reference = "Anderberg, M.R. (1973). Cluster Analysis for Applicaitons. Academic Press.", description = "Standardized Chi-Squared (= Chi / n).") pr_Tschuprow <- function(x, y) { tab <- table(x,y) exp <- rowSums(tab) %o% colSums(tab) / sum(tab) sqrt(sum((tab - exp) ^ 2 / exp) / sum(tab) / sqrt((nrow(tab) - 1) * (ncol(tab) - 1))) } pr_DB$set_entry(FUN = "pr_Tschuprow", names = "Tschuprow", distance = FALSE, convert = "pr_simil2dist", type = "nominal", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sqrt{[sum_ij (o_i - e_i)^2 / e_i] / n / sqrt((p - 1)(q - 1))}", reference = "Tschuprow, A.A. (1925). Grundbegriffe und Grundprobleme der Korrelationstheorie. Springer.", description = "Tschuprow-standardization of Chi-Squared.") pr_Cramer <- function(x, y) { tab <- table(x,y) exp <- rowSums(tab) %o% colSums(tab) / sum(tab) sqrt(sum((tab - exp) ^ 2 / exp) / sum(tab) / min((nrow(tab) - 1), (ncol(tab) - 1))) } pr_DB$set_entry(FUN = "pr_Cramer", names = "Cramer", distance = FALSE, convert = "pr_simil2dist", type = "nominal", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sqrt{[Chi / n)] / min[(p - 1), (q - 1)]}", reference = "Cramer, H. (1946). The elements of probability theory and some of its applications. Wiley, New York. ", description = "Cramer-standization of Chi-Squared.") pr_Pearson <- function(x, y) { tab <- table(x,y) exp <- rowSums(tab) %o% colSums(tab) / sum(tab) Chi <- sum((tab - exp) ^ 2 / exp) sqrt(Chi / (sum(tab) + Chi)) } pr_DB$set_entry(FUN = "pr_Pearson", names = c("Pearson","contingency"), distance = FALSE, convert = "pr_simil2dist", type = "nominal", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sqrt{Chi / (n + Chi)}", reference = "Anderberg, M.R. (1973). Cluster Analysis for Applicaitons. Academic Press.", description = "Contingency Coefficient. Chi is the Chi-Squared statistic.") pr_Gower <- function(x, y, l = NA, f = NA, m = NA, weights = NA) { ## prepare vectors len <- length(x) d <- logical(len) s <- double(len) ## compute scores groupwise: ## logical if (any(l)) { s[l] <- x[l] & y[l] d[l] <- x[l] | y[l] } ## factor if (any(f)) { s[f] <- x[f] == y[f] d[f] <- TRUE } ## metric if (any(m)) { s[m] <- 1 - abs(as.double(x[m]) - as.double(y[m])) d[m] <- TRUE } ## do not count missings d[is.na(s)] <- FALSE s[is.na(s)] <- 0 drop(crossprod(s, weights)) / drop(crossprod(d, weights)) } pr_Gower_prefun <- function(x, y, pairwise, p, reg_entry) { ## transform x and y x <- as.data.frame(x) if (!is.null(y)) y <- as.data.frame(y) ## determine types l <- sapply(x, is.logical) f <- sapply(x, is.factor) o <- sapply(x, is.ordered) f <- f & !o m <- !(l | f | o) ## Transform ordinal variables if (any(o)) { ##FIXME: Gower uses ranks, but daisy just uses internal codes?? x[o] <- lapply(x[o], as.integer) x[o] <- lapply(x[o], function(i) (i - 1) / (max(i) - 1)) if (!is.null(y)) { y[o] <- lapply(y[o], as.integer) y[o] <- lapply(y[o], function(i) (i - 1) / (max(i) - 1)) } m <- m | o } ## scale metric types RANGE <- function(x, y = NULL) { ## compute scale MAX <- sapply(x, max, na.rm = TRUE) MIN <- sapply(x, min, na.rm = TRUE) if (!is.null(y)) { MAX <- pmax(MAX, sapply(y, max, na.rm = TRUE)) MIN <- pmin(MIN, sapply(y, min, na.rm = TRUE)) } ret <- MAX - MIN ## do not scale when range == 0 ret[ret == 0] <- 1 ret } if (any(m)) { if (!is.null(p$ranges) && is.null(p$ranges.x)) p$ranges.x <- p$ranges r <- if(is.null(p$ranges.x)) RANGE(x[m], y[m]) else rep_len(p$ranges.x, length.out = sum(m)) if (!is.null(p$min) && is.null(p$min.x)) p$min.x <- p$min MIN <- if (is.null(p$min.x)) { if (is.null(y)) sapply(x[m], min, na.rm = TRUE) else pmin(sapply(x[m], min, na.rm = TRUE), sapply(y[m], min, na.rm = TRUE)) } else rep_len(p$min.x, length.out = sum(m)) x[m] <- sweep(sweep(x[m], 2, MIN), 2, r, FUN = "/") if (!is.null(y)) { if (!is.null(p$min) && is.null(p$min.y)) p$min.y <- p$min if (!is.null(p$min.y)) MIN <- rep_len(p$min.y, length.out = sum(m)) if (!is.null(p$ranges) && is.null(p$ranges.y)) p$ranges.y <- p$ranges if (!is.null(p$ranges.y)) r <- rep_len(p$ranges.y, length.out = sum(m)) y[m] <- sweep(sweep(y[m], 2, MIN), 2, r, FUN = "/") } } ## weights weights <- rep_len(if (is.null(p$weights)) 1 else p$weights, length.out = ncol(x)) p <- list(l = l, f = f, m = m, weights = weights) list(x = x, y = y, pairwise = pairwise, p = p, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "pr_Gower", names = "Gower", PREFUN = "pr_Gower_prefun", distance = FALSE, convert = "pr_simil2dist", type = NA, loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "Sum_k (s_ijk * w_k) / Sum_k (d_ijk * w_k)", reference = "Gower, J.C. (1971). A general coefficient of similarity and some of its properties. Biometrics, 27, pp. 857--871.", description = "The Gower Similarity for mixed variable types. w_k are variable weights. d_ijk is 0 for missings or a pair of FALSE logicals, and 1 else. s_ijk is 1 for a pair of TRUE logicals or matching factor levels, and the absolute difference for metric variables. Each metric variable is scaled with its corresponding range, provided the latter is not 0. Ordinal variables are converted to ranks r_i and the scores z_i = (r_i - 1) / (max r_i - 1) are taken as metric variables. Note that in the latter case, unlike the definition of Gower, just the internal integer codes are taken as the ranks, and not what rank() would return. This is for compatibility with daisy() of the cluster package, and will make a slight difference in case of ties. The weights w_k can be specified by passing a numeric vector (recycled as needed) to the 'weights' argument. Ranges (minimum) for scaling the columns of x and y can be specified using the 'ranges.x'/'ranges.y' ('min.x' / 'min.y') arguments (or simply 'ranges' ('min') for both x and y). In case of cross-proximities, if not specified via these arguments, both data frames are standardized together.") proxy/R/dissimilarities.R0000644000176200001440000004071113042403627015161 0ustar liggesuserspr_Euclidean <- function(x, y) sqrt(crossprod(x - y)) pr_Euclidean_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_Euclidean" } list(x = if (!is.list(x)) 0 + x else x, y = if (!is.null(y)) if (!is.list(y)) 0 + y else y, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_euclidean_dist", names = c("Euclidean","L2"), PREFUN = "pr_Euclidean_prefun", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "sqrt(sum_i (x_i - y_i)^2))", reference = "Cox, T.F., and Cox, M.A.A. (2001. Multidimensional Scaling. Chapmann and Hall.", description = "The Euclidean Distance (C implementation with compensation for excluded components)") pr_Mahalanobis <- function(x, y, cov) sqrt(mahalanobis(x, y, cov)) pr_Mahalanobis_prefun <- function(x, y, pairwise, p, reg_entry) { if (length(p) < 1) p <- list(cov(x, y)) list(x = x, y = y, pairwise = pairwise, p = p, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "pr_Mahalanobis", names = "Mahalanobis", PREFUN = "pr_Mahalanobis_prefun", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sqrt((x - y) Sigma^(-1) (x - y))", reference = "Mahalanobis P.C. (1936), On the generalised distance in statistics, Proceedings of the National Institute of Science of India 12, pp. 49-55", description = "The Mahalanobis Distance. The Variance-Covariance-Matrix is estimated from the input data if unspecified.") pr_Bhjattacharyya <- function(x, y) sqrt(crossprod(sqrt(x) - sqrt(y))) pr_DB$set_entry(FUN = "pr_Bhjattacharyya", names = "Bhjattacharyya", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sqrt(sum_i (sqrt(x_i) - sqrt(y_i))^2))", reference = "Bhattacharyya A. (1943). On a measure of divergence between two statistical populations defined by probability distributions, Bull. Calcutta Math. Soc., vol. 35, pp. 99--109", description = "The Bhjattacharyya Distance") pr_Manhattan <- function(x, y) sum(abs(x - y)) pr_Manhattan_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_Manhattan" } list(x = if (!is.list(x)) 0 + x else x, y = if (!is.null(y)) if (!is.list(y)) 0 + y else y, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_manhattan_dist", names = c("Manhattan", "City-Block", "L1", "taxi"), PREFUN = "pr_Manhattan_prefun", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "sum_i |x_i - y_i|", reference = "Cox, T.F., and Cox, M.A.A. (2001. Multidimensional Scaling. Chapmann and Hall.", description = "The Manhattan/City-Block/Taxi/L1-Distance (C implementation with compensation for excluded components)") pr_supremum <- function(x, y) max(abs(x - y)) pr_supremum_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_supremum" } list(x = if (!is.list(x)) 0 + x else x, y = if (!is.null(y)) if (!is.list(y)) 0 + y else y, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_maximum_dist", names = c("supremum", "max", "maximum", "Tschebyscheff", "Chebyshev"), distance = TRUE, PREFUN = "pr_supremum_prefun", convert = "pr_dist2simil", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "max_i |x_i - y_i|", reference = "Cox, T.F., and Cox, M.A.A. (2001. Multidimensional Scaling. Chapmann and Hall.", description = "The Maximum/Supremum/Chebyshev Distance (C implementation)") pr_Minkowski <- function(x, y, p = 2) (sum(abs(x - y) ^ p)) ^ (1/p) pr_Minkowski_prefun <- function(x, y, pairwise, p, reg_entry) { if (length(p$p) < 1) stop("Argument 'p' mandatory!") p <- p$p[[1L]] if (p <= 0) stop("'p' must not be smaller than 0.") if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_Minkowski" } list(x = if (!is.list(x)) 0 + x else x, y = if (!is.null(y)) if (!is.list(y)) 0 + y else y, pairwise = pairwise, p = p, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_minkowski_dist", names = c("Minkowski","Lp"), PREFUN = "pr_Minkowski_prefun", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "(sum_i (x_i - y_i)^p)^(1/p)", reference = "Cox, T.F., and Cox, M.A.A. (2001. Multidimensional Scaling. Chapmann and Hall.", description = "The Minkowski Distance (C implementation with compensation for excluded components)") pr_Canberra <- function(x, y) {tmp <- abs(x - y) / abs(x + y); sum(tmp[!is.nan(tmp)])} pr_Canberra_prefun <- function(x, y, pairwise, p, reg_entry) { if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_Canberra" } list(x = if (!is.list(x)) 0 + x else x, y = if (!is.null(y)) if (!is.list(y)) 0 + y else y, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_canberra_dist", names = "Canberra", PREFUN = "pr_Canberra_prefun", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "sum_i |x_i - y_i| / |x_i + y_i|", reference = "Cox, T.F., and Cox, M.A.A. (2001. Multidimensional Scaling. Chapmann and Hall.", description = "The Canberra Distance (C implementation with compensation for excluded components)") pr_WaveHedges <- function(x, y) sum(1 - min(x, y) / max(x, y)) pr_DB$set_entry(FUN = "pr_WaveHedges", names = c("Wave", "Hedges"), distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sum_i (1 - min(x_i, y_i) / max(x_i, y_i))", reference = "Cox, T.F., and Cox, M.A.A. (2001). Multidimensional Scaling. Chapmann and Hall.", description = "The Wave/Hedges Distance") pr_Divergence <- function(x, y) {tmp <- (x - y)^2 / (x + y)^2; sum(tmp[!is.nan(tmp)])} pr_DB$set_entry(FUN = "pr_Divergence", names = "divergence", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sum_i (x_i - y_i)^2 / (x_i + y_i)^2", reference = "Cox, T.F., and Cox, M.A.A. (2001). Multidimensional Scaling. Chapmann and Hall.", description = "The Divergence Distance") pr_KullbackLeibler <- function(x,y) { p <- x / sum(x); q <- y / sum(y); sum(p * log(p / q)) } pr_DB$set_entry(FUN = "pr_KullbackLeibler", names = c("Kullback", "Leibler"), distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sum_i [x_i * log((x_i / sum_j x_j) / (y_i / sum_j y_j)) / sum_j x_j)]", reference = "Kullback S., and Leibler, R.A. (1951). On information and sufficiency. The Annals of Mathematical Statistics, vol. 22, pp. 79--86", description = "The Kullback-Leibler-distance.") pr_BrayCurtis <- function(x, y) sum(abs(x - y)) / sum(x + y) pr_DB$set_entry(FUN = "pr_BrayCurtis", names = c("Bray","Curtis"), distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sum_i |x_i - y_i| / sum_i (x_i + y_i)", reference = "Bray J.R., Curtis J.T. (1957). An ordination of the upland forest of the southern Winsconsin. Ecological Monographies, 27, pp. 325--349", description = "The Bray/Curtis dissimilarity. Note that it is not a distance since it vioalates the triangle inequality.") pr_Soergel <- function(x, y) sum(abs(x - y)) / sum(max(x, y)) pr_DB$set_entry(FUN = "pr_Soergel", names = "Soergel", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sum_i |x_i - y_i| / sum_i max{x_i, y_i}", reference = "Cox, T.F., and Cox, M.A.A. (2001). Multidimensional Scaling. Chapmann and Hall.", description = "The Soergel Distance") pr_Levenshtein_prefun <- function(x, y, pairwise, p, reg_entry) { if (system.file(package="cba") == "") stop("Need package 'cba'!") else loadNamespace("cba") if (pairwise) stop("Pairwise distances not implemented by sdist()!") list(x = x, y = y, pairwise = pairwise, p = p, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "sdists", names = "Levenshtein", PREFUN = "pr_Levenshtein_prefun", convert = "pr_dist2simil", distance = TRUE, loop = FALSE, abcd = FALSE, C_FUN = FALSE, PACKAGE = "cba", formula = "Number of insertions, edits, and deletions between to strings", reference = "Levenshtein V.I. (1966). Binary codes capable of correcting deletions, insertions, and reversals. Soviet Physics Doklady 10, pp. 707--710", description = "Wrapper for sdists() in the cba-package (C implementation).") pr_Podani <- function(x, y) { a <- b <- c <- d <- 0 n <- length(x) for (i in seq_len(n - 1)) for(j in (i+1):n) { a <- a + (x[i] < x[j] && y[i] < y[j] || x[i] > x[j] && y[i] > y[j]) b <- b + (x[i] < x[j] && y[i] > y[j] || x[i] > x[j] && y[i] < y[j]) c <- c + (x[i] == x[j] && y[i] == y[j] && (x[i] == 0 && y[i] == 0 || x[i] > 0 && y[i] > 0)) z <- sum(x[i] == 0, x[j] == 0, y[i] == 0, y[j] == 0) d <- d + ((x[i] == x[j] || y[i] == y[j]) && z > 0 && z < 4) } 1 - 2 * (a - b + c - d) / (n * (n - 1)) } pr_DB$set_entry(FUN = "pr_Podani", names = c("Podani","discordance"), distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "1 - 2 * (a - b + c - d) / (n * (n - 1))", reference = "Podani, J. (1997). A measure of discordance for partially ranked data when presence/absence is also meaningful. Coenoses 12: 127--130.", description = "The Podany measure of discordance is defined on ranks with ties. In the formula, for two given objects x and y, n is the number of variables, a is is the number of pairs of variables ordered identically, b the number of pairs reversely ordered, c the number of pairs tied in both x and y (corresponding to either joint presence or absence), and d the number of all pairs of variables tied at least for one of the objects compared such that one, two, or thee scores are zero.") pr_chord <- function(x, y) sqrt(2 * (1 - crossprod(x, y) / sqrt(crossprod(x) * crossprod(y)))) pr_DB$set_entry(FUN = pr_chord, names = "Chord", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sqrt(2 * (1 - xy / sqrt(xx * yy)))", reference = "Orloci, L. 1967. An agglomerative method for classification of plant communities. J. Ecol 55:193--206.", description = "The Chord distance.") pr_geodesic <- function(x, y) acos(crossprod(x, y) / sqrt(crossprod(x) * crossprod(y))) pr_DB$set_entry(FUN = pr_geodesic, names = "Geodesic", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "arccos(xy / sqrt(xx * yy))", reference = "Orloci, L. 1967. Data centering: a review and evaluation with reference to component analysis. Syst. Zool. 16:208--212.", description = "The geoedesic distance, i.e. the angle between x and y.") pr_whittaker <- function(x, y) sum(abs(x / sum(x) - y / sum(y))) / 2 pr_DB$set_entry(FUN = pr_whittaker, names = "Whittaker", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sum_i |x_i / sum_i x - y_i / sum_i y| / 2", reference = "Whittaker, R.H. (1952) A study of summer foliage insect communities in the Great Smoky Mountains. Ecological Monographs 22, pp. 1--44.", description = "The Whittaker distance.") pr_hellinger <- function(x, y) sqrt(crossprod(sqrt(x / sum(x)) - sqrt(y / sum(y)))) pr_DB$set_entry(FUN = pr_hellinger, names = "Hellinger", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = TRUE, C_FUN = FALSE, abcd = FALSE, formula = "sqrt(sum_i (sqrt(x_i / sum_i x) - sqrt(y_i / sum_i y)) ^ 2)", reference = "Rao, C.R. (1995) Use of Hellinger distance in graphical displays. In E.-M. Tiit, T. Kollo, & H. Niemi (Ed.): Multivariate statistics and matrices in statistics. Leiden (Netherland): Brill Academic Publisher. pp. 143--161.", description = "The Hellinger distance.") pr_fJaccard <- function(x, y) sum(pmin(x, y)) / sum(pmax(x, y)) pr_fJaccard_prefun <- function(x, y, pairwise, p, reg_entry) { if (any(x < 0 | x > 1)) stop("Valid range for fuzzy measure: 0 <= x <= 1") if (!is.null(y) && any(y < 0 | y > 1)) stop("Valid range for fuzzy measure: 0 <= y <= 1") if (!is.matrix(x)) { reg_entry$C_FUN <- FALSE reg_entry$loop <- TRUE reg_entry$FUN <- "pr_fJaccard" } list(x = 0 + x, y = if (!is.null(y)) 0 + y else NULL, pairwise = pairwise, p = NULL, reg_entry = reg_entry) } pr_DB$set_entry(FUN = "R_fuzzy_dist", names = c("fJaccard", "fuzzy_Jaccard"), PREFUN = "pr_fJaccard_prefun", distance = TRUE, convert = "pr_dist2simil", type = "metric", loop = FALSE, C_FUN = TRUE, abcd = FALSE, formula = "sum_i (min{x_i, y_i} / max{x_i, y_i})", reference = "Miyamoto S. (1990). Fuzzy sets in information retrieval and cluster analysis, Kluwer Academic Publishers, Dordrecht.", description = "The fuzzy Jaccard dissimilarity (C implementation).") proxy/R/registry.R0000655000176200001440000003344013437463265013652 0ustar liggesusers################################### ### generic registry infrastructure ### IDEA: use lexical scope with nested functions to create an ### S3-"object" that exposes the data structure only through accessor functions. .FUNCall <- function(f) function(...) f(...) registry <- function(index_field = "names", entry_class = NULL, validity_FUN = NULL, registry_class = NULL, ignore_case = TRUE) { ### ATTRIBUTES ## repository DATA <- META <- list() ## permissions PERMISSIONS <- c(set_entries = TRUE, modify_entries = TRUE, delete_entries = TRUE, set_fields = TRUE) SEALED_FIELDS <- SEALED_ENTRIES <- character(0) ### METHODS (PRIVATE) ## helper functions .field_exists <- function(name) name %in% .get_field_names() .make_field <- function(default = NA, type = NA, is_mandatory = FALSE, is_modifiable = TRUE, validity_FUN = NULL) structure(list(type = type, default = default, is_mandatory = is_mandatory, is_modifiable = is_modifiable, validity_FUN = validity_FUN), class = "registry_field") .make_entry <- function(l) { ## sort l <- l[c(index_field, setdiff(.get_field_names(), index_field))] ## return object (possibly inheriting from entry_class) structure(l, class = c(entry_class, "registry_entry")) } .get_mandatory_fields <- function() names(META)[sapply(META, function(i) i$is_mandatory)] .get_field_defaults <- function() lapply(META, function(i) i$default) .get_entry_index <- function(name, stop_if_missing = TRUE) { ## returns the index of the first exact match (modulo case): index <- if (ignore_case) sapply(DATA, function(i) toupper(name) %in% toupper(i[[index_field]])) else sapply(DATA, function(i) name %in% i[[index_field]]) if (!any(index)) { if (stop_if_missing) stop(paste("Entry", dQuote(name), "not in registry.")) else return(NULL) } which(index)[1] } .check_value <- function(field_name, field, value) { ## Note we do not check NA entries because this may by set automatically if (!is.function(value) && !any(is.na(value))) { ## check class / list of alternatives, if any if (!any(is.na(field$type))) { ## check list of alternatives if (length(field$type) > 1) { if (!is.character(value) || !value %in% field$type) stop(paste("Possible values for", dQuote(field_name), "are:", paste(field$type, collapse = ", "))) ## check class } else if (!inherits(value, field$type)) { stop(paste("Field", dQuote(field_name), "does not inherit from class", field$type)) } } ## apply validity function, if any if (!is.null(field$validity_FUN)) do.call(field$validity_FUN, list(value)) } } .check_for_unknown_fields <- function(n) { ## check for fields not in repository missing <- !.field_exists(n) if (any(missing)) stop(paste("Field(s) not in repository:", paste(n[missing], collapse = ", "))) } ### METHODS (PUBLIC) ## field accessors .entry_exists <- function(name) if (ignore_case) toupper(name) %in% toupper(unlist(sapply(DATA, function(i) i[[index_field]]))) else name %in% unlist(sapply(DATA, function(i) i[[index_field]])) .get_field <- function(name) { if (!.field_exists(name)) stop(paste("Field", dQuote(name), "not in registry.")) META[[name]] } .get_fields <- function() META .get_field_names <- function() names(META) .set_field <- function(name, default = NA, type = NA, is_mandatory = FALSE, is_modifiable = TRUE, validity_FUN = NULL) { ## check permissions if (!PERMISSIONS["set_fields"]) stop("Setting of fields not allowed.") ## check for double entries if (.field_exists(name)) stop(paste("Field", dQuote(name), "already in registry.")) ## possibly, infer type from argment if (!any(is.na(type)) && !(is.character(type))) type <- class(type) ## check mandatory fields if (is_mandatory && !any(is.na(default))) stop("Mandatory fields should have no default.") ## create field entry field <- .make_field(type = type, default = default, is_mandatory = is_mandatory, validity_FUN = validity_FUN) ## check validity of default .check_value("default", field, default) ## add field to meta data META <<- c(META, list(field)) names(META)[length(META)] <<- name ## add (missing) fields to data entries DATA <<- lapply(DATA, function(i) {i[[name]] <- default; i}) } .n_of_entries <- function() length(DATA) ## entry accessors .set_entry <- function(...) { ## check permissions if (!PERMISSIONS["set_entries"]) stop("Setting of entries not allowed.") ## parameter handling l <- list(...) n <- names(l) .check_for_unknown_fields(n) ## check for mandatory fields mandatory_fields <- .get_mandatory_fields() missing_mandatory_fields <- !mandatory_fields %in% n if (any(missing_mandatory_fields)) stop(paste("The following fields are mandatory, but missing:", paste(mandatory_fields[missing_mandatory_fields], collapse = ", "))) ## check for double entries for (i in l[[index_field]]) if (.entry_exists(i)) stop(paste("Entry", dQuote(i), "already in registry.")) ## check defaults and set values, if needed field_defaults <- .get_field_defaults() default_fields <- names(field_defaults) missing_fields <- setdiff(default_fields, n) l[missing_fields] <- field_defaults[missing_fields] ## check field types, and apply field check function, if any. for (f in n) { meta <- .get_field(f) .check_value(f, .get_field(f), l[[f]]) } ## apply entry check function if (!is.null(validity_FUN)) do.call(validity_FUN, list(l)) ## add entry entry <- .make_entry(l) DATA <<- c(DATA, list(entry)) names(DATA)[length(DATA)] <<- l[[index_field]][1] } .get_entries <- function(names = NULL, pattern = NULL) { ## fix search if (!is.null(names)) { if (ignore_case) DATA[intersect(toupper(names), toupper(names(DATA)))] else DATA[intersect(names, names(DATA))] ## grep search } else if (!is.null(pattern)) { pattern_in_entry <- function(x) any(sapply(x, function(i) is.character(i) && length(grep(pattern, i) > 0))) DATA[sapply(DATA, pattern_in_entry)] ## else: return all entries } else DATA } .get_entry_names <- function() { if (length(DATA) < 1) character(0) else names(DATA) } .get_entry <- function(name, stop_if_missing = TRUE) { index <- .get_entry_index(name, stop_if_missing) if (is.null(index)) return(NULL) DATA[[index]] } .delete_entry <- function(name) { ## check permissions if (!PERMISSIONS["delete_entries"]) stop("Deletion of entries not allowed.") ## fetch entry index (this also checks if the entry exists) entry_index <- .get_entry_index(name) ## check sealed entries if (name %in% SEALED_ENTRIES) stop(paste("Deletion of entry", dQuote(name), "not allowed.")) ## delete it DATA[entry_index] <<- NULL } .modify_entry <- function(...) { ## check permissions if (!PERMISSIONS["modify_entries"]) stop("Modifying of entries not allowed.") ## parameter handling l <- list(...) n <- names(l) ## check for index field if (!index_field %in% n) stop(paste("Index field", dQuote(index_field), "missing.")) .check_for_unknown_fields(n) ## determine entry name name <- l[[index_field]][1] ## fetch entry index (this also checks if the entry exists) entry_index <- .get_entry_index(name) ## fetch entry entry <- DATA[[entry_index]] name <- entry[[index_field]][1] for (field in setdiff(n, index_field)) { ## check if field is modifiable field_entry <- .get_field(field) if (!field_entry$is_modifiable) stop(paste("Field", dQuote(field), "is not modifiable.")) ## check if entry and field are sealed if ((name %in% SEALED_ENTRIES) && (field %in% SEALED_FIELDS)) stop(paste("Modification of field", dQuote(field), "in entry", dQuote(name), "not allowed.")) ## check new value value <- l[[field]] .check_value(field, field_entry, value) ## modify entry locally entry[[field]] <- value } ## apply entry check function if (!is.null(validity_FUN)) do.call(validity_FUN, list(entry)) ## modify entry in registry DATA[entry_index] <<- list(entry) } ## get all entries for one field .get_field_entries <- function(field, unlist = TRUE) { if (!.field_exists(field)) stop(paste("Field", dQuote(field), "not in registry.")) ret <- lapply(DATA, function(i) i[[field]]) if (unlist) unlist(ret) else ret } ## permission getters/setters .get_permissions <- function() PERMISSIONS .restrict_permissions <- function(set_entries = TRUE, modify_entries = TRUE, delete_entries = TRUE, set_fields = TRUE) { PERMISSIONS["set_entries"] <<- PERMISSIONS["set_entries"] && set_entries PERMISSIONS["modify_entries"] <<- PERMISSIONS["modify_entries"] && modify_entries PERMISSIONS["delete_entries"] <<- PERMISSIONS["delete_entries"] && delete_entries PERMISSIONS["set_fields"] <<- PERMISSIONS["set_fields"] && set_fields } .seal_entries <- function() { SEALED_ENTRIES <<- .get_entry_names() SEALED_FIELDS <<- .get_field_names() } .get_sealed_entry_names<- function() SEALED_ENTRIES .get_sealed_field_names <- function() SEALED_FIELDS ### CONSTRUCTOR ## create index field .set_field(name = index_field, type = "character", is_mandatory = TRUE, is_modifiable = FALSE) ## return class structure(list(get_field = .FUNCall(.get_field), get_fields = .FUNCall(.get_fields), get_field_names = .FUNCall(.get_field_names), set_field = .FUNCall(.set_field), entry_exists = .FUNCall(.entry_exists), get_entry = .FUNCall(.get_entry), get_entries = .FUNCall(.get_entries), get_entry_names = .FUNCall(.get_entry_names), set_entry = .FUNCall(.set_entry), modify_entry = .FUNCall(.modify_entry), delete_entry = .FUNCall(.delete_entry), n_of_entries = .FUNCall(.n_of_entries), get_field_entries = .FUNCall(.get_field_entries), get_permissions = .FUNCall(.get_permissions), restrict_permissions = .FUNCall(.restrict_permissions), seal_entries = .FUNCall(.seal_entries), get_sealed_entry_names = .FUNCall(.get_sealed_entry_names), get_sealed_field_names = .FUNCall(.get_sealed_field_names) ), class = c(registry_class, "proxy_registry")) } "[[.proxy_registry" <- function(x, i) x$get_entry(i) length.proxy_registry <- function(x) x$n_of_entries() print.proxy_registry <- function(x, ...) { l <- x$n_of_entries() if (l < 1) writeLines(paste("An object of class", dQuote("registry"), "with no entry.")) else if (l == 1) writeLines(paste("An object of class", dQuote("registry"), "with one entry.")) else writeLines(paste("An object of class", dQuote("registry"), "with", l, "entries.")) } print.registry_field <- function(x, ...) writeLines(formatUL(x, label = names(x), ...)) print.registry_entry <- function(x, ...) { x <- .functions_to_characters(x) x[[1]] <- paste(x[[1]], collapse = ", ") writeLines(formatUL(x, label = names(x))) } summary.proxy_registry <- function(object, ...) as.data.frame(object, ...) as.data.frame.proxy_registry <- function(x, ...) do.call(rbind, lapply(x$get_entries(), function(entry) { entry <- .functions_to_characters(entry) data.frame(unclass(entry[-1]), ...) } ) ) .functions_to_characters <- function(x) { ## transform function entries into character strings funs <- sapply(x, inherits, "function") for (field in names(x)[funs]) x[[field]] <- paste(format(x[[field]]), collapse = "") x } proxy/R/seal.R0000755000176200001440000000007410642723444012715 0ustar liggesusers### seal entries of proximity database pr_DB$seal_entries() proxy/R/dist.R0000644000176200001440000003067713213572671012746 0ustar liggesusersdist <- function(x, y = NULL, method = NULL, ..., diag = FALSE, upper = FALSE, pairwise = FALSE, by_rows = TRUE, convert_similarities = TRUE, auto_convert_data_frames = TRUE) { ### PARAMETER HANDLING ## convenience hack to allow dist(x, "method") if ((is.function(y) || is.character(y)) && is.null(method)) { method <- y y <- NULL } ## transform data frame into matrix iff all columns are atomic and either numeric (integer or double) or logical or complex is.n_l_c <- function(x) all(sapply(x, is.numeric)) || all(sapply(x, is.logical)) || all(sapply(x, is.complex)) if (is.data.frame(x) && auto_convert_data_frames && is.n_l_c(x)) x <- as.matrix(x) if (is.data.frame(y) && !is.null(y) && auto_convert_data_frames && is.n_l_c(y)) y <- as.matrix(y) ## vector handling if (is.vector(x) && is.atomic(x)) x <- as.matrix(x) if (!is.null(y) && is.vector(y) && is.atomic(y)) y <- as.matrix(y) ## method lookup reg_entry <- NULL if (is.null(method)) method <- if (is.data.frame(x)) "Gower" else if (is.logical(x)) "Jaccard" else "Euclidean" if (!is.function(method)) reg_entry <- if (inherits(method, "proxy_registry_entry")) method else pr_DB$get_entry(method) ## some checks if (!is.data.frame(x) && !is.matrix(x) && !is.list(x)) stop("Can only handle data frames, vectors, matrices, and lists!") if ( is.data.frame(x) && !by_rows) stop("Cannot transpose mixed data frames") if (!is.null(y)) { if (is.data.frame(x) && !is.data.frame(y) || is.matrix(x) && !is.matrix(y) || is.list(x) && !is.list(y)) stop("x and y must be of same type.") if (is.matrix(x) && is.matrix(y) || is.data.frame(x) && is.data.frame(y)) if (by_rows && (ncol(x) != ncol(y))) stop("x and y must be conform in columns.") else if (!by_rows && (nrow(x) != nrow(y))) stop("x and y must be conform in rows.") } ### PREPROCESS params <- list(...) if (!is.null(reg_entry)) { if(is.function(reg_entry$PREFUN) || is.character(reg_entry$PREFUN)) { tmp <- do.call(reg_entry$PREFUN, c(list(x, y, pairwise, params, reg_entry))) if (!is.null(tmp)) { x <- tmp$x y <- tmp$y pairwise <- tmp$pairwise params <- tmp$p reg_entry <- tmp$reg_entry } } method <- reg_entry$FUN } ## helper function for calling the C-level loops .proxy_external <- function(CFUN, x, y) do.call(".External", c(list(CFUN, x, y, pairwise, if (!is.function(method)) get(method) else method), params ) ) result <- ### PASS-THROUGH-cases if (!is.null(reg_entry) && !reg_entry$loop) { if (!by_rows && !is.list(x)) { x <- t(x) if (!is.null(y)) y <- t(y) } if (reg_entry$C_FUN) { do.call(".Call", c(list(method), list(x), list(y), pairwise, params, list(PACKAGE = reg_entry$PACKAGE))) } else ## user functions need not implement pairwise do.call(method, c(list(x), list(y), params), envir = asNamespace(reg_entry$PACKAGE)) } else if (is.null(y)) { ### LOOP WORKHORSE for auto-proximities ## transpose data for column-wise loop if (!by_rows && !is.list(x)) x <- t(x) if (is.list(x) && !is.null(reg_entry) && reg_entry$abcd) x <- do.call("rbind", x) if (is.matrix(x) && !is.null(reg_entry) && reg_entry$abcd) ## binary matrix .proxy_external(R_apply_dist_binary_matrix, x != 0, NULL) else if (is.matrix(x)) ## real, integer matrix .proxy_external(R_apply_dist_matrix, x, NULL) else if (is.list(x) && !(is.data.frame(x) && by_rows)) ## list .proxy_external(R_apply_dist_list, x, NULL) else ## data frame (by rows) .proxy_external(R_apply_dist_data_frame, x, NULL) } else { ### LOOP WORKHORSE for cross-proximities ## transpose data for column-wise loop if (!by_rows && !is.list(x)) { x <- t(x) y <- t(y) } if (is.list(x) && !is.null(reg_entry) && reg_entry$abcd) { x <- do.call("rbind", x) y <- do.call("rbind", x) } if (is.matrix(x) && !is.null(reg_entry) && reg_entry$abcd) ## binary matrices .proxy_external(R_apply_dist_binary_matrix, x != 0, y != 0) else if (is.matrix(x)) ## real, integer matrices .proxy_external(R_apply_dist_matrix, x, y) else if (is.list(x) && !(is.data.frame(x) && by_rows)) ## lists .proxy_external(R_apply_dist_list, x, y) else ## data frames (by rows) .proxy_external(R_apply_dist_data_frame, x, y) } ### set col/rownames for cross-proximity-objects (if needed) if (is.matrix(result) && is.null(dimnames(result))) if (is.list(x) && !is.data.frame(x)) { rownames(result) <- names(x) colnames(result) <- names(y) } else if (by_rows) { rownames(result) <- rownames(x) colnames(result) <- rownames(y) } else { rownames(result) <- colnames(x) colnames(result) <- colnames(y) } ### POSTPROCESS if (!is.null(reg_entry)) { if (is.function(reg_entry$POSTFUN) || is.character(reg_entry$POSTFUN)) result <- do.call(reg_entry$POSTFUN, c(list(result, params))) if (!reg_entry$distance && !(is.logical(convert_similarities) && !convert_similarities)) { result <- if (is.function(convert_similarities) || is.character(convert_similarities)) do.call(convert_similarities, list(result)) else if (is.null(reg_entry$convert)) pr_simil2dist(result) else do.call(reg_entry$convert, list(result)) } method <- reg_entry$names[1] } ### RETURN DIST-OBJECT result <- if (is.matrix(result)) structure(result, class = "crossdist") else if (inherits(result, "dist")) structure(result, Diag = diag, Upper = upper) else structure(result, class = "pairdist") structure(result, method = if (is.character(method)) method else if (missing(method)) deparse(substitute(y)) else deparse(substitute(method)), call = match.call()) } simil <- function(x, y = NULL, method = NULL, ..., diag = FALSE, upper = FALSE, pairwise = FALSE, by_rows = TRUE, convert_distances = TRUE, auto_convert_data_frames = TRUE) { ## convenience to allow dists(x, "method") if ((is.function(y) || is.character(y)) && is.null(method)) { method <- y y <- NULL } if (is.null(method)) method <- if (is.data.frame(x)) "Gower" else if (is.logical(x)) "Jaccard" else "correlation" ret <- dist(x, y, method, ..., diag = diag, upper = upper, pairwise = pairwise, by_rows = by_rows, convert_similarities = FALSE, auto_convert_data_frames = auto_convert_data_frames) ## possibly convert to similarity reg_entry <- pr_DB$get_entry(attr(ret, "method"), stop_if_missing = FALSE) if (!is.null(reg_entry)) { if (reg_entry$distance && !(is.logical(convert_distances) && !convert_distances)) { ret <- if (is.function(convert_distances) || is.character(convert_distances)) do.call(convert_distances, list(ret)) else if (is.null(reg_entry$convert)) pr_simil2dist(ret) else do.call(reg_entry$convert, list(ret)) } } class(ret) <- unique(c(if (inherits(ret, "crossdist")) "crosssimil" else "simil", class(ret))) ret } # note that a simil object must always also be a dist # object for method dispatch as.simil <- function(x, FUN = NULL) { if (inherits(x, c("simil", "crosssimil"))) x else if (inherits(x, c("dist", "crossdist"))) { class(x) <- if (inherits(x, "dist")) c("simil", class(x)) else c("crosssimil", setdiff(class(x), "crossdist")) if (!is.null(FUN)) FUN(x) else { reg_entry <- NULL if (!is.null(attr(x, "method"))) reg_entry <- pr_DB$get_entry(attr(x, "method"), stop_if_missing = FALSE) if (!is.null(reg_entry) && !is.null(reg_entry$convert)) do.call(reg_entry$convert, list(x)) else pr_dist2simil(x) } } else structure(stats::as.dist(x), class = c("simil", "dist")) } as.dist <- function(x, FUN = NULL) { if (inherits(x, c("simil", "crosssimil"))) { class(x) <- if (inherits(x, "simil")) setdiff(class(x), "simil") else c("crossdist", setdiff(class(x), "crosssimil")) if (!is.null(FUN)) FUN(x) else { reg_entry <- NULL if (!is.null(attr(x, "method"))) reg_entry <- pr_DB$get_entry(attr(x, "method"), stop_if_missing = FALSE) if (!is.null(reg_entry) && !is.null(reg_entry$convert)) do.call(reg_entry$convert, list(x)) else pr_simil2dist(x) } } else if (inherits(x, c("dist", "crossdist"))) x else stats::as.dist(x) } ## as we do not know if the object is the result of some ## user-defined transformation the values of ## s(x,x) are not defined. ## we need to copy stats::as.matrix.dist() since the use of ::: is deprecated: as.matrix <- function(x, ...) base::as.matrix(x, ...) as_matrix_dist <- function (x, ...) { size <- attr(x, "Size") df <- matrix(0, size, size) df[row(df) > col(df)] <- x df <- df + t(df) labels <- attr(x, "Labels") dimnames(df) <- if (is.null(labels)) list(seq_len(size), seq_len(size)) else list(labels, labels) df } as.matrix.simil <- function(x, diag = NA, ...) { x <- as_matrix_dist(x) diag(x) <- diag x } ## however, it seems reasonable to assume that d(x,x)=0, ## which is also the default in stats. as.matrix.dist <- function(x, diag = 0, ...) { x <- as_matrix_dist(x) diag(x) <- diag x } print.crossdist <- print.crosssimil <- function (x, digits = getOption("digits"), justify = "none", right = TRUE, ...) { if (length(x) > 0) { m <- as.matrix(x) cf <- format(m, digits = digits, justify = justify) print(cf, quote = FALSE, right = right, ...) } else { cat(data.class(x), "(0)\n", sep = "") } invisible(x) } print.pairdist <- function(x, ...) { print(as.vector(x), ...) invisible(x) } print.simil <- function (x, diag = NULL, upper = NULL, digits = getOption("digits"), justify = "none", right = TRUE, ...) { if (length(x)) { if (is.null(diag)) diag <- if (is.null(a <- attr(x, "Diag"))) FALSE else a if (is.null(upper)) upper <- if (is.null(a <- attr(x, "Upper"))) FALSE else a m <- as.matrix(x) if (diag) diag(m) <- NA cf <- format(m, digits = digits, justify = justify) if (!upper) cf[row(cf) < col(cf)] <- "" if (!diag) cf[row(cf) == col(cf)] <- "" print(if (diag || upper) cf else cf[-1, -attr(x, "Size"), drop = FALSE], quote = FALSE, right = right, ...) } else { cat(data.class(x), "(0)\n", sep = "") } invisible(x) } pr_simil2dist <- function(x) 1 - abs(x) pr_dist2simil <- function(x) 1 / (1 + x) ### proxy/R/database.R0000755000176200001440000000561512015644530013535 0ustar liggesusers###################### ### proximity database ###################### ### check functions .function_or_character <- function(x) { if (!is.character(x) && !is.function(x)) stop("Need function or function name.") } .abcd_and_binary <- function(x) { if (x$abcd && (x$type != "binary")) stop(paste(dQuote("abcd"), "mode only available for binary measures.")) } ### create registry pr_DB <- registry(registry_class = "pr_DB", entry_class = "proxy_registry_entry", validity_FUN = ".abcd_and_binary") ## create fields pr_DB$set_field("FUN", is_mandatory = TRUE, validity_FUN = ".function_or_character") pr_DB$set_field("distance", type = "logical", default = TRUE) pr_DB$set_field("PREFUN", validity_FUN = ".function_or_character") pr_DB$set_field("POSTFUN", validity_FUN = ".function_or_character") pr_DB$set_field("convert", validity_FUN = ".function_or_character") pr_DB$set_field("type", type = c("binary", "nominal", "ordinal", "metric", "other"), default = "other") pr_DB$set_field("loop", type = "logical", default = TRUE) pr_DB$set_field("C_FUN", type = "logical", default = FALSE) pr_DB$set_field("PACKAGE", type = "character", default = "proxy") pr_DB$set_field("abcd", type = "logical", default = FALSE) pr_DB$set_field("formula", type = "character") pr_DB$set_field("reference", type = "character") pr_DB$set_field("description", type = "character") ### summary and print methods summary.pr_DB <- function(object, verbosity = c("short", "long"), ...) { if (length(object) < 1) return(object) verbosity <- match.arg(verbosity) object <- switch(verbosity, short = list(names = object$get_field_entries("names", unlist = FALSE), distance = object$get_field_entries("distance")), long = list(names = object$get_field_entries("names", unlist = FALSE), distance = object$get_field_entries("distance"), type = object$get_field_entries("type"), formula = object$get_field_entries("formula")) ) structure(object, class = "summary.pr_DB") } print.summary.pr_DB <- function(x, ...) { distance <- c("Similarity", "Distance")[x[[2]] + 1] if (length(x) > 2) x[[3]][is.na(x[[3]])] <- "other" for (i in unique(distance)) { ind <- which(distance == i) if (length(ind) > 0) { writeLines(paste("*", i, "measures:")) if (length(x) > 2) { for (k in ind) writeLines(paste(" ", paste(x[[1]][[k]], collapse = "/"), " (", x[[3]][k], ") = ", x[[4]][k], sep = "")) } else { tmp <- sort(sapply(x[[1]][ind], function(i) i[1])) writeLines(strwrap(paste(tmp, collapse = ", "))) } } writeLines("") } } proxy/MD50000644000176200001440000000324714057514472011762 0ustar liggesusers80c8637e466d5b95793133a6bd4e62c3 *DESCRIPTION e499bda510adfcd4965b84687f11f0e6 *NAMESPACE efa5c729c0744f2fdb2a9f970f982ef7 *R/database.R 1c71957d655b0e93bac59105b9686697 *R/dissimilarities.R 81564934699e3d2ea653bb96ff9557b6 *R/dist.R cdaabc587c5d0131b79932da2032f0c3 *R/registry.R 71b44c779ee8e97b5af57b7ed472d448 *R/seal.R 36838e38efb2896b57b45ed2e1ab7147 *R/similarities.R 93c8ed420c2081297b7bc2d15a910af4 *R/util.R 34d2a9328c715649558d9e4803e1137d *TODO 240f23eec5cb81f75966aec043b43fc5 *build/vignette.rds 90a7d63359a21aab7adbc559ba34d54a *inst/NEWS.Rd bcc756854674661ee0886c0c7d3b6c18 *inst/doc/overview.R ec24a75149c7ad35c68b86f731dd0d7b *inst/doc/overview.Rnw 188eac3a63bb199f41fc7e81f182a6eb *inst/doc/overview.pdf 33df2d5efe6540e5285f14efd04f9361 *man/dist.Rd e00843cd697a1c925beea8be1463d54a *man/registry.Rd d9e34eea7ac668a0dc3798ec3e476e9f *man/rowSums.dist.Rd ea3fccbab55400cec52145ae1cd6a27e *src/apply.c b4a706d255fa2aeb729a733b122586af *src/arrayIndex.c 27a3ea78d0556f8e4bf560a214ec2aad *src/distance.c 5f5f6926767c934be5dee4d32e6e1e98 *src/dll.c bc14e759c2f7a7cab49e46df2199d053 *src/util.c 13acd6f763d4de9893a20051e4be759a *tests/apply.R 78e117321ea3d293909a0319222c3c90 *tests/apply.Rout.save 14e496e16fd3d9b13fa15009bfe815bf *tests/distance.R eae066df7e690fa00bbae6c835ff78dd *tests/distance.Rout.save 27b436b4f387638e82c91b50af2a6722 *tests/distcalls.R 636ed74a3b4e8ea99034edf277fb2ece *tests/distcalls.Rout.save 4f1d3a7ca18b4ac25cc3fc1b3b9707d1 *tests/registry.R bf59ffa18d3b12a25ccd66c382f531cf *tests/registry.Rout.save fd828b8c89d8e47753b9f79a2dc73957 *tests/util.R 7e8a4f5895205fb8770d34499c8796f6 *tests/util.Rout.save ec24a75149c7ad35c68b86f731dd0d7b *vignettes/overview.Rnw proxy/inst/0000755000176200001440000000000014057153366012422 5ustar liggesusersproxy/inst/doc/0000755000176200001440000000000014057153366013167 5ustar liggesusersproxy/inst/doc/overview.Rnw0000755000176200001440000000145711714440422015524 0ustar liggesusers\documentclass{article} \title{Proximity measures in the \texttt{proxy} package for {\sf R}} \author{David Meyer} %\VignetteIndexEntry{Proximity measures in the proxy package for R} %\VignetteDepends{proxy} %\VignetteKeywords{similarities,dissimilarities,distance} %\VignettePackage{proxy} <>= library(proxy) x <- summary(pr_DB, "long") FUN <- function(index) { for (i in which(index)) { writeLines(sprintf("Aliases: %s", paste(x$names[[i]], collapse = ", "))) writeLines(sprintf("Type : %s", x$type[i])) writeLines(sprintf("Formula: %s\n", x$formula[i])) } } @ \begin{document} \maketitle \section{Similarities} <>= FUN(x$distance == FALSE) @ \section{Dissimilarities} <>= FUN(x$distance == TRUE) @ \end{document} proxy/inst/doc/overview.R0000644000176200001440000000151214057153366015157 0ustar liggesusers### R code from vignette source 'overview.Rnw' ################################################### ### code chunk number 1: preliminaries ################################################### library(proxy) x <- summary(pr_DB, "long") FUN <- function(index) { for (i in which(index)) { writeLines(sprintf("Aliases: %s", paste(x$names[[i]], collapse = ", "))) writeLines(sprintf("Type : %s", x$type[i])) writeLines(sprintf("Formula: %s\n", x$formula[i])) } } ################################################### ### code chunk number 2: overview.Rnw:24-25 ################################################### FUN(x$distance == FALSE) ################################################### ### code chunk number 3: overview.Rnw:29-30 ################################################### FUN(x$distance == TRUE) proxy/inst/doc/overview.pdf0000644000176200001440000021457314057153366015544 0ustar liggesusers%PDF-1.5 % 3 0 obj << /Length 677 /Filter /FlateDecode >> stream xV0W䘨8{kҊ bs[`[wlO "&]v%]A$^vA “ XMes_ |.CQl@-b74g}RɄ^d- wes֦vԅs'Y--e$f3 x:* :Y?[*V=nQyu[Pg0u. %3bi7M|nB endstream endobj 15 0 obj << /Length 587 /Filter /FlateDecode >> stream xW[o0~ϯ1Q a&]&EZ^6 mԆvh`SLH&pdsw.63p-ߵ]#83X#3\>l8=}$!{tCZZ%DDBt0ɘsؘK+(ŊaN+"\!Yd;F`xwrFX Ė8=)sXV4>>%\)ai%[kXqOXTO PʁF lOQ;G]u>hz!D=K*Fi"V|Ha4#)4`_QVϚ\X@4n\) .'% ,B/ϕSND:q܎әJgX,ګ/**k2sm ?A 5OScΧ \瘯Z >9úi(7𼨝eFc6+xcoٻnޭYTJljnQy%TXU܀u]z-T,?jި 1~'x7[xK0,O5w}q9[t:m endstream endobj 18 0 obj << /Length 643 /Filter /FlateDecode >> stream xW_o0ϧ#&nk&U4e w6snhd;ߟ}NNE{3&,hfI[@1Po³pI(21.]ݠ6ϣ32s'\{kXPmV]:E]wʌA:M7waS$T=z.<'1` R[R_)9&p6ݼV~!=[IB_1js NO5O%n]P^ʥΥ#m~t`Eld10#H;`.p9: Z)W8sH'ꔦ_ao#'l "|@m#JZr>õao "/h8dg1n3kȈ'?BUgJI*S[ORI"`h9>37.#?3cInN:p2".nx۸{cDb ^|>#B> TO8WBCV`v!Ug:RV.5R'ǝyQ&DtAJgv_Z9N7/ Vj=);zZ: endstream endobj 21 0 obj << /Length 632 /Filter /FlateDecode >> stream xXMs0+t$Ml#[ɔ)NJ0M{%٬C{vVoW+NP|l׌;s[,zLBBgO̅\4܆hfЖV߄ m.FҬQEs%^wUs|d f؎wpkZS?Цc=RxM MVc9(u8JzgX!! =| 9$T̬#0+կO'pr3)6iXIz 9< wpn T(4?&x[r$kNZD܋ń"@o`E(Y+5DTg-x_NY?2Ie:w!-uNI9az jBUZ` I>.ϔly19k; W)F7L )cۨ!uM#?P*?P1B #RIH*WP3&iӍUӷ &ˬ9'[65]rO˯uR\aEmTbGb܈mG> stream xVMo@+|4-65*P`C *) $w֞Ml 4J13o7cEvV膁Xbv܊R쉐XR5=>z _Dc9!GH8B$a[#9U}K'B~޸=,r1RcR5f|N|-!)ܙ銁N '/JؐؔLF%PWfDݐܾ3Ԓ2U?FDBȦDf )9>Ir`RRR:DSPJ%/?BζeVzf3uPO`K:In^/r!pJFdƚ\+)4gLV;0IZfވ\+Rx0~0Ur/o>z_\Ҽ#2wOh@0n}xkFumlfѫ!kFq1ymJX^#D_oLkbcgZ\ tv؎Es`k j endstream endobj 32 0 obj << /Length1 1526 /Length2 7683 /Length3 0 /Length 8699 /Filter /FlateDecode >> stream xڍT6,RJ Hwt00C J4!  !] s[~5:\+qEj2 yquH/3.>#@bY"p D@"@ O E gqTp+.,jk?K1$,,; q-5KnG% C!H`C"Dxx<<<-].9P@ qqX~[:B0eA]u6HK!p׻ 75p9@GY V l nU l #,^P- 4THN%W%qon YZ #=W )íe8WrPؽxܬ @6HX9neBLB~ ((@Oϯ^N;~>N' rj ] ~>t@k( B~gw}^y7(K)qaOF p w Ѵ?r6n?%1ߵw+rS ?|?6vܖP_wuC n j32wc }PW'ZޯAM+ף,z'.{Ky8akx..^wW|>Ix0@ޥl.S@# xtF‚˿ѝl<@~P7|rAO<.w\<ȿu7w~i Owz n:Zc^5H}3+ES餞6ey ZWGVq:򷥫pJ88540qv"%>PR::NQh(Ӏq̯ H.Zl'cnT8pv |\|/umd4E;y?#B>S\(>%=>%sM .tfblp L<`a a.VI`)~I}qqHobQΞ^3ᅂ1^- QҸ\XV<Օ4 h?R$XCނ[Img9k Um2^ V1jg}:J'd<`FIɟ8fҬh}uGkZɋ@T Ew+p{\,LϦG]6JE>cY=zu;ekhK%~N)`<ugD {tIÅAzHOP&:1kWtX`"#ˆݳtc:P0T? l *FI"b ěM/0f9_e9_hW o>սEʹҬXN$-uc:hɵnҥ|čYZrM1W_ܺ\cRS ^8}Q@~A)3*ѦƯltz*Z,7PkѲ(YZ|qc}xj;bDBA8.f4f@R*{xZiDr>E8 +{/g6R IȪ7j g) "> 0~9OS>ڕ=LhY3N~iy!(^Jh$@)e͂I{ EX]Q2(OoSki Z}0l]Y2[YM.=ki[ed;Ǜs'1O1V{ 2]j#t Ā`@QX5?-3& v[Fݸz= `?jWGb<0ރ%##}r{? >llӨ}P1ܩ o$8,o ^θFٳjn|c{ XeyJ~@y.ྷʃ8Bam$2kb$n&W졤4츱sdek25)QF3"J,TF0cWK-"zJR#W@aQ&\KE[F=vp\ØZraF!1'?J9LÉHeҭԲ "Ŗt~ymr#i{e??SB)w*"\: +nOxڔN*qȼzY邎l7PnɴÖ2qA.NYfo'e] ѿߞƴ0 |.Gtcqs<c 8aJP暕u7Y+{ $سr WIz)H}FSK4Ώg>?؆_C#(g6OoxD`j$:O/ { V6,Քaqdk8y*%}`A'}< қ1h(/(pm('`xI!<\6.u= ǰlGQ'J2K{=<9MfC5*`hC16laO J;Hx4P?5 7$E%01o" +t8mfn^]8ۚ~٤=c^)}P'1/o͎yT^Z|R,sb(Me!nQpN$Bo2KŻؕ/^-E4_)=ԍkC\.G#zMA߻$>J}h7Fvz0#Bm,IIpxƺB1ю|;cgoyD3bcEהx~_W]B,\.]Ht|q6@Ќ>-+:}*sKz%7!&T/VC),Z>zCiX{Kk,‚>);`XgVDu1*HI;66{B dדSP$a$Ɨ(ߛʯX{7vHg^|R"F F{^Q^̗\#c==vLkX3>>6摹eFO?=t\-$hB"kpcA#*3,wF{. ]WOJ|b)}7uS#Z?C#~)Ik˞jwJtOY e,1&وlU:)WSoNr *F"[DE'c.1 )ÏS<'_ P)&p6'ڮ uۻq~>{ɩor%rۅޏ(V"V^R[U\G~o|<e+ 9Y%҇giBjx e.Owj K6d\ct{&_oiӔ=@μk:߳n(}óo>2 EFVCmQR+VT#G\e 1cg֏bUgQ)J+#Ć@f{ڰ:\8>SA-}ܨ#u7:Cix{2|S,Qu3b#6GܚfwIZ$d!-g"<'R csl{6N!R9.Xhs)+~x(K݄ʌ49Z|Qr%/r`5 iGZi/L|(ͻ2qڪѯx̤j|9*˱k_0E ضsIB1 v.gGh&>;0j!x[N %I.AZ%,χ{sCãQBa%ѝeD8mǗuohx0FbF="ZHr^)=FS^9,PB91c([Ic%eTZ*śkWL\B捇KpT苎8pHyvM-1Ra'PA(ڨ$.]B/> <{X8b*Nz%)b;7Z˴7qBJOYݍ1A쟘{N'F@#Q 4Ox ;0BZCUq٢VXM'Q `l3,]e~g[7WxwtD< ¾LXW6 umpzZa4D_|1m*6UNGW4)\|>]FkǑ,G8ݨS(WqdHx:` w=@k)\Mna3ђ<*cRQ(U#͓-H:9(R i3u\@CQw~SVS'yB}rof  2cj}-y[\H*~xa/-!p9d3#jVrwn&`t

ժjx9Kz2\i9L"*"ɾ0m̼*cg+c~أ {*eQH74fDik7tXp;%5Y"wŕ+0oHv R[յbP˫]Y*Hk?TY+yـdAJ?u'֋_E\| XIj(l<@Uc0VV#5B*=2;NAۍf *m-iUo3oK ˳U .ŇՙwZ&;BņivI7#>Xe}vuB[]wI$IF]s t=j>J[oK#ڜ6eՂnv0G?kMlbǃޡ4fQ갴M8TP|ʏQ5{cw]'*cpmxrʝ^߲~D-8GWDD%:vO0zeU{>I'cQ2vsVjaGMhH3eDe2+p 7a3 kOVyChWO5l WޱjV Z֎lD>-OĮۑZe^p&níkG-@b$/]iwԳ>ƭh(3obwchc rk8(>/b~R.kNL&!:rCIE_F -+n#ࡰ?dM)`%&Qke@ Zxk̊Zy\e}7[ ֗ZeL',RA'kl"k;tļ "]Txbo=++C?1e%)Ͻ 2dedXwMoqM\Жe(Y}"Ξ 3"dS"/׽Zb@q+_'V5{Q|㬆T^4PSkRBrigԚN,܄=|" ?mcoّe-fa ]\mMejHdUI5J:GAJQ"P"lERP-5珥=etf "!μepr Gx endstream endobj 34 0 obj << /Length1 1433 /Length2 7339 /Length3 0 /Length 8316 /Filter /FlateDecode >> stream xڍT]6,) RCw )Cw7 080t R- JҍJ Z߷f{ξk}9{-xM= ?__AX Hw(._$;&P8PT //")P{B5ǢpABPwk `D\ H .w;ځa}G vI'Uˋ΋@:Jsp('~7 @`u G8HA p= v"kE}6 /  C>P# A(o7 Mw`O0# w ݞru~;)+..8w}P$}٧po w݄+!QUrsbm;+  pku=!ߎZx@ jBpd!wz{ey'/{72T?!~0?[dwC.bU_ӿ*[w#r ~a~YB;M[f.P߄;z@q7CZM=( a>F; ׁR_)A;๻ݍӻN\Jp;H%Y?,C GBwH7*,sީ Bb? 億x!(?_N!$[Q{KEy\Q<{=haUXvo~dsn˥+QޑmZ\ k7MhX<䒜yu+{>ZݨQǿ*gkR4c9IɈ$>&8>'W{Ņ'Xg"|w񝁀{;3%-1VŴ_qBwd}CG[PxCWZk@8u/ZbUI5Q:U%BA~Ĩ9fNK-`⡩vpce^g)g V;dzdhJN )X!vc*#9O~?/=B"@4̋26LDcA>M3t9K/t%!ua|+伊%> ߄g~1*wOW D[8 2 mՅˇEto};)+"0:?(x@4<vwTe.\[]yORCW^ܥ,l=%#g-g ˒??nn>c y &북7 r|WuMZe<+(nHǦb^2L-KF8D7.zTOBw1 JK24 Ho:͔(UnQDz,Q&enƼ(ewtJuo]x_uZ4e*-=*iDV 8b"LBq춈bpJNWLG##^EFr3*GFXɋ5]6|O UzO;^ )hk}{rJe:(hы$~mBWrxs8&^2_⏓z+e|&ci`);|{0N]2T62GփX?=֑NIFT>Imj @{ ^*ÂתʫN|(nX,-7<ӭOUsT^n<_-;T;27Od2i rHc:H4bλNcB8JL^m<;A:۳.mq Uffw<+8TӬ,IjT@wݔK ّ"i-{:݊}.^SO"55F9>{,^f4:-Z?uPtۥN@]S8 1~v8Af;tp[Gw7s؋wdUMM6d(Qٽ1D^YR5ŧZG9ݹĸ8F[OފVO ]c=m)^-YT7O}EK9auP%s~JlCM޶"1MpڔI3r& J57w֔ |!UCxrL uļ#iB][ޗq6tGw#D  񤓶`u]_={\4>/N>c5ZRﲚ0%v~Odn/J5}$V2ލ>i(MCS^Mw})Y*s Cejɚs_15Ct\\Eь!]3KhJ!8À=ݛ]I c M_w03a^p*5Y G:%tbq4%cc"de kPTq}N$ ^H픞yR1`MdܜK{p[45䮪ݓWaMjMÄx=! XU ||ZWsc>JѼz!@p99}sa*rI2Q& zc뀫F8+i>1*8C/c$"^}Sk!\`c_J8Ϧ$"*OyM۲}OrL4`T gacZM×{ EL6ӣpS?yA'mM3*,:ha*x\ӎ_R.R}aW~ȌӶS ̆U׳O<|ɋG=ܺ;|-'3/w >96*#+!!iAFnFɥylt(G}6"]̂L=V+|X 5 JQVlȴ% BWUdCF®6n_{ v* |)EsO)Ԕ|TVNV7 ,$DN߬ ~;[z^,GY_*Jbsm8:91^YkX}iRV?.n\SE :ȅ.A=w.u|!6E>Nx|"2nΞ^" QE`4[34ۈD-&zU8Qz ؄yq ^;w [_X*{m#aRv'vWH G:ZLڮߙovd?f!}h6g1ГM-DmզZ͉P?6~dàۼNX;`:9C p8qy=AބXS/9 *a2f%G1?H7n>٥wcϻWt$-~p/ikeq3lZb-[KoqEcG?k/=jAc]$&>F/8[OndL,;ㅊU҆rӧӬ:F4 7GN FEw8zRUoћD)>ӓ@]k:w_/ |*j+!$c %lNQ~_G9E L*qhҲ~R{UxX8Y+gYijZ)sy [ᢽM{#l';]A {#e)nMrww7+V|%n]'hN=+dBҳ4gd9$l4[osnBVv&SMƊr[ot$[Oƙ_d7N/,6_ZM$b>HmP}1!rs=-SWV›x-O9ڷ{HN*oMح:,aˆ7sFAXH%UZ*CFe weg@ZM:$q8Oa 94^sXcE̴F~UaA|;'N[XϠvonL ۼZ{wąaa3I W%4z$oAp"}%>`d}> =Y(Ʃ@>^g7C/h8sc 18q\C)> aaAƅGŁ#Ig2 {g(n}>=.b1B arRd)Jѧ .kr4?XTۿeAHG9m#7ufzy0ˏH1mh+~ڥªO7wǙ7t{,ycun #O\S"[픻>ڻ 9ͰeO~]%&2C@5בJ$ς HN4AfGlu>(w99Vhjн]i\E'"偬iFPҘ;pd5A'.S7YКdOY,r̺ [kZ>FʎzZ ù_ 0 .\\GH&j*qv 1Np'-,{|zwV!(z@%jݑ:)ciصsF{]*T %ͽݖ z}1#ئh"yfnSy 0S_ܩ`]."+V|{c;kY@yf-OyZ [Yxfd7sA?'#M{Zʑ@#)u{0үswymINSJ-g`SOOR(v"G7[$‰^S&v+@\G G{Kpd#j -1P 2+[[̤!L{MjvDun9 | |FPX]3&G Ȋخ0*6j /&N֟ǘxy?mJ,톳(m?jۛ)}m|hG BXYD32f3LIv+2 4rDq2QX yXJdwȉWJxꨆD?o}fU+6ƑeU-C9I4Ƽn va/e 9|ڝMt]%5!mɽäܜxm"&b ^ts*ѢҀBc#3DGa8O?ӕKIε+wU4˯(껛yvnnW2~*Q_/]k9E^mD {U̶\U7MɁmj."ٓB̽]9hMvi7:pP;&6<u}) ҖJծ_w={'.ؚ> r V){jmeotQcf%3'k&7=AsUy&Ѣg)[4.8%pUP`r&(yu"<årl~_8wد+dhI72șD@5 endstream endobj 36 0 obj << /Length1 1607 /Length2 8600 /Length3 0 /Length 9660 /Filter /FlateDecode >> stream xڍP.%`Hp\;$2 0'Hp n5KKAݻ{5U|_^CSEppEZ< fdԁ pg#T_Y8xɁGw'?W ", |;WBV?} vY VRs|-_xGgi Rp9@="0^/]OSGj[/S1E_#ҭOѽhv IhݨEӌ3O14؈ /Tٱ}Gz\x.:w=fxlHIzN<:%B1U\M![sxc⹥u =1abeyLɍJ&jU)>?ʊP3z^^N@ Gd:s[ YRW 9 ?<z&biCQ-ݦY~L[#V Ây?nIT&t~>þi֔v>fye!<\SW+q%cdI<"G ͦR3s4]H{Pt*\>ZƭD|>"vukM4DO(m<яUPJN9ɺ]zVɹr :'V(,hJIclCTvS 1l}&F͉5!H:6٢71YltjX/cIմݽitS'Zhlf aV=O*TNoOZTlP11@6ǡ{kPT0*Iy$n Oو$V?OF qGpj)Uhd'}3FI*Ma37E>k8_GƨZxoquN]Seg0R3kE RN"e;ШB}^Ų#%5p 6=ED6S_,ft4SD%g $W|Tl9F/WoAiŅM)Dӫ(X2Ec5 3פm!zd2tD ӮWkLNjfǦRYbK (5i"_]ZY&5tH޹^m"ʡs1ח+ ܰ 0AʭOh*~gxlq>BmS#&[*0Nk$'[X8^`VxG&vDwPݦ:EY٭ (SW|V.aRD,=FɬhT{v6H=>ƒC m_xZqF@vve%|/U9C`h "FSP.Pod^-|6NdWG|G5Ho^ǫbz*ϹRs\%WdH㜱r/L J[NցEUܴoE;cUo{]9ҷ#y7K,lYіfyyҪQЂ*Zˆ{k)pqJN%<@^%{N6jSVǣ 0}BQfE_zoa# Y֙u`zL_@4K?nB8J+.U)<,&f Dj)U &zo[RǬ T@b*[J",#pǶ= 퐢"q/њf U>ݤ1( ͳ{Z& ]K]C4d|7OYk o,q68'N;"exCvZu+;_G=` Xܥ~M5PusKqg* G]w40 ӫbB/垂άyHN"zEYs11F#֖/YD:<{?Уh:_#1t͟[#Fs|rFO~D jЦY<^W1諼c6h| nuYNr55q k}Qt,y?E0Z٬rHۄԈβBN8aet>ȍ ] :$CZ1=p>9ppj=ap-R;IKz=^)j_Qw;} -ylx:jnY ֍ޓLB6*g17TGo3)>E8"|lw1sfMM>M,„яl"Xkl6q^ DhopEt<4J WrQHl)z"]Wj|\kE {ۥ=vhd_6ף"}h.K0}UZ>_ .-%#&_Y~DG׽qt-f'{)P"D"r1Ru4ay-nԸ\\C')isREB˨U7wkV/+Y`zuoޘDϣgd|状qa?Y}FT}ђ}ҺjsmjRƥ-u 0Fp8A;M#>YQ!&OӪos*w2 @e|G91_(@cAMP]Gisji OfZ k&V)nekr53MUAq4E a > V@ ŸI1{. C=|B|!,"w[Ml4)cu{\@Jdd@B3CmNT3#:%&:eJ084&Ec;]g~COYuʊm%?X s[墉XJ+[%ۖ*|li2O^{\Hf^UH}(Mg68,!98gcB:H.Y|SDld`ਦdkT¦s<3q#?RC%-t++,9g: GG(. xT>EUw4zfa}.5`gZ,+42H'35! @- ΁x5_ -jwH.9$}wOrmK:z֝7}}&D(_O,48U)Xf`צBjxO3o΃x (dOTrXVT#6rF;1^Iwv=ɖ,".Ti_k^; >rXg%QҠd$2fUGŶlmP[KaK f54J7nܣ2}+fEviο:-j{.p6]L+ڡcAc zt-Uj`K{ܹcCwt_NcNZK8 uO1fA Msv {4XO?e٦C`s 98Oth8'e"OQTN9AfNkvPz[O'U8͢WpK n^{gZ4؟8/cLXS'=;\{eO+ʅ4/K֜W\\h}y.""&NzE"?Fރ'vFGP#^ոpf7|繪M:.JD4i~~<ͱ7sI'?9oWm|cUy0Jj&2i&\/K<4lPtF5-Fehаv}ˀ Iwa GW/ЖK#m\Α:hCcD,^k>Udyۿ^zu{}4i3ITv MfRuw 狏y*GԎM #;q´0ʺejsDQEDZS^5aڳ\:n̞3x%.<"Mf^Jdv3Ѱ(4ᘢ'urQa֮EAWyEcEoZ]-r+ y!A)C5@9jot6dQtG/IL`yvM6%1#ULYf懑]ssrNcE%ZzpA,K"en`֊F.3 FzW&e/-^$>JG]B?I8~G\Z=R32p+ +z]MTvCt-hAoג%3$R㟍0ƕO3?I۴nu{BYsP1$Ff\ln9c֓tZRQ/ ?BmVNR(oxQ@xXQ uCޯݣ(9s.e2sFO5ULhhO4S$VHDARv/lniZ/ 2M6y!,i!ikp'L]s3yRKbvژC~YsӥHDo&6`\e[WP'z-o_E7T*׀db~K w#)5q~4֍jjT}!VB=|p ͖̽]M;ۨ=};B\ʲmp^*of_E3ƃ⟦X?K!(>[jѰvjɄZRj b%~t#*0׼t*fIO4h읝/l- CB4ӑP(g^Ƕ44l|G%yEC$2ߙ q5.`l{H )|iuG@F2|F4T]R) -mCnc |*s*ъOC//ۜ(]$2;;anwT*j8 v-D+f]lq:h5*-ԡNפ=Ľ(S~Jq7>\3.faY\"Zό]/hߺ[qbЄ)vV迟/0' vE(\^c,Nv~lUG,X7|$(.A@DlbSXMVYDҶA\ Yٻ&֞o)! C VRgc^-2RRl曄6b{gqit@,)AѬj F !;ݨE҆ḵ-Us K@"Ex ] sMe"mWW`1VťQ $('dMeBwӟGHm;l*>ߢ=,ayeXd2rfS'~'^xssڎN>?6?<`3gsZ~OkóWō!˧f>! }ԟݶcW<+qgrqY/W[t dM4?JbژUDJ|L8y#eSHk{ eEjCvu@1fg'VMrG[]M*۵N5f|Hɫ-Մ[%c^|bPGkjIԏ=%X+h<,qcѽT:^\2HY ?⹍|=QNFu߭~u]+珙(+ \ڴE=Rg06OԑoIr"Arq |ا+Nڧ7olV On}޻f^g|{ϧT]-Zz 3\#Ԉv5 g(@sm=&cnء5tFbn٬#w,1G 揀~j endstream endobj 38 0 obj << /Length1 1630 /Length2 9562 /Length3 0 /Length 10611 /Filter /FlateDecode >> stream xڍT6ҝR# %ݝJ33Cww))ݠݭtt4{yoZ3ϵ{34Y-`f ԙ (T\l@ '-VAa 2u~I:?)à;esH-lF+ p[Y;?# 9MeSgk)@f9{W!kgg{vv7776SJvh@ *_N5an l:=y@-@My%=_,g`wC8 P0 ` Te؜ݝYP߆vN'SWSٓM2ӧnl"0OSZH  w}R`G=:Y[( 7C-,7abϮ ;6yGfr@^~r7f^G[ԁ=`l zAr2u]@>^T7BX͝f +0?џ ˿;oOdD/ ?Η]NNYU\X<@?7QLW<ا)`׿ϟ`w,iAp-h?.-H6<6x⬋aO[_S]_; @W+lP+$vYͭ"_rKf`N߷ f>NO=-,~o'kH<</U0 s~r<9>Q> ]8[rح<v?SD@OyaOy9>%r| `w|)_#6wqt|d,h ;m~f.lSrU!Nƺ5"E4 r+dG)b0QDڿXsn_ {ʴH֠Dm˺w 7A`_q̟\{_M(1mˍ_8&1dkt[IF8vJcM~]Aۆ޹,be򫥙 $P*zFFKyvTc5qDWZzkɗ>VrxDq^^W"W!I]tӻ;L-+anIⅲftv\?S݅Ɩµ eXVvtۢNX)QLپiY"e!b,` &ЌNaζ2Y8J:g2MUc,2Cu" &&mkG5/-} Қ1g>y4Vb*9uOĀF NXEmQO@gv-WV?(Z`NQ "= )ᗦNLy-:*W9'pP aTn97,fy'Jg鹴QXTdq2 >(& 6[,*@N&K2n9,nWhctȴW^qr{5(Gįmye~5F$xLBKC+4+q_F01مL\%Xf_qQfG|G|u[@fs)Qm߃o&|4#5=),tݲ8 $+Ox+YQI 5!lrB$($|R fs~SDt͇!_{6Wqfw1=@ud 77WHхiClG[REKbz\濚kPldfr <[Y?g[!h_97N}w7QX:l(YKB|$bK37~yyLC~EKވtʤrgMhEx>=xk{rr Ј@KlsZK^6/#o`ju{LnP> Sڠ)NYʅg !+:$R^ÈCl̚4ƛGMrKF| c)q# :49=gz [-6 5gjNhG*%~(uDhi+VL[I(fP!?L-Jf+,"f%茆7MToYܿuuIqԐ20ฤ]iEkM~YM% Tԍk\+gM{Saop@ù~ЇE+A4_s* )xҳ6ϗehTœpQl7*^5xJ4jC97.]ٯ%GLwUOHc3ؒ J-,ăq #\MU;X(__HJ)#NGH[½hp$̫6$AyiY".>)k)xYOwOJZ]+&y\ڮw߰^ij2TtIRל[a>2p$sGbbajxDl[q,_ODjX(D̛]֍ڣYLӈQ`UT-%vudڮe5]{f η)n,}}`%EOxgW**sXR/qV}#ZO]ViuYT|3dvɸK}s}}bYIz|0¸Zi|9_gr;Lk}3-Ў^E5uƳHiU"ȸ0G НΏ^bq=.,] ӝbw>3@u"mR|${Eڔn?_%%#|.C:GNFx;*٬Qt?xkQA ``\襬6EyKW)jkRud\V%UإF,P/Z!ݥjoz= '5:z`%Qܿ)}G6Md'BeBcE\*ϔୠ3jK*+v$Uň%C *ʝ;evr Ek|듵f̷F].:!Q^PexbsFf8svԋ34_R nK8Y'ʔ|o.fPx'F3%|(A 1@qmQS8 Y8,{n< ec+JOk_S_ :W g3"ML%s0+oZCbg~fsv +Z0}E4z5RP2)GF *wM _3X2Jj)\4:U䢄0RC!l7DY>MCҝr>~ɔHg#Zn_ce}VzC%j!\^}]xIdpVķDrm/Y˻# vߨCjثMo9MALG9MԜOx%N C+9o\!U7gb%)t-I\bB฽:ԍ($Mnv!UԽV8 قInFV^]j+Bo D@1sm%SoW/՚;\Ng<;oNi7ѭ\SiAy8vSar4v}R}YFI_KQ+%fEh7+!"}~cml=J+9qmτNRIm$xt)/Uvc`,m;_af ]2=/HnEJ\܃Ax BhVa] K KUwȪL t y3Kmz8|.THð؁hdE;e .S.s3 /(CЎBFKC  K^B9)GF< @T%?v aA:eB6sD C/S]۷.n¶]μVaƉx"{wwefH7g4 '%*yּMӝQSln\<ٰ?gfWd<3fd2)!^!0|ӖǡܹY4Źr›ZI0ԟ{ulGsm;f!nb %E[zQm6 ת8Ba~() 싪 $lFEe0dfr{N{ KNm℗\q gP\c"rͼrb A9) iNNYDz<J3oPd r:lPM ²MU66B5NC=i@Ɯk#M^GS|9w&a.kѳ_&ۅ RrF^!@wxeY`0s}oөīŽ#u8JkdKY "قm+Z jj怄@Hen S' [IʳouN LۉuoR&˯ CΦ*u5})$ \[#%To^Gk1<6GX6X/w}LF-e{5 fnq%̆,0rwusZl6}d=1$)& & l LVõ[ع0p@:A?+%u1sR9L֝׌pTrO[qyՐZvۍ-Xb]Eāw \S޴"ECUAy3[OnOWDcb=-G(cG_(NR#ƬUjANbYŏ5g$tn{c I6S~]ص_*k+g8xA8C/-& l.E@L4orL0bw1lU9+Ge_eϋk7`,bj6T`י9(|桎`$\!j#0Uǚ{}9Znyyء=O~:}3%OR$I)N78Jx쫊;5#(9XQ]En.⒳|M[ͻ}|c]p4b(He9KTL_LJhAS1UǷ?/Szcg<0,ɽCO vۭ)l̛xkhʯS @~g::]bLȟ,zSIq g0gz3ꓶ: r7H|z}$T &ܟwltxGw \?!u1OMx+]t&wZO2!7GGzWH$b׍MDY^pmC 0ZU[19(ad2b2Abe}S `BHj_~|uU2]{3Ay*V-M(C|| PWĞ&6!_!ْN Y}XؖUt`.cei@0.J;ǻ^p! fP;`wE6[>W R`9ZFx69JHjЗ& Eڎ5BU'`;3pm95 Y Vxib¾Oǹ:Ʊg9=H{wƍۆ(BSb)5j6wv?̉LS ڨJUc??ʅ_z[ݮ뇴jp1t_q?~왛5{4%o乵}),7C9;C;][g5۸ٵzEI'nK<M؛1qz>cc&9:<7Wn4眍}AFZChzU{}Ջ*-6.aF9(5mmB|ZN{4m/bY{oLHd%nQf"ߐ"ޑa،ˇ}/˪쉺U1?PPe?|ù&D N]*i OD6;$UKRBe^m"WOt'ƋP>Ke(L}-RH/b}ҙ4W?L_iL< !~n]xD;#e ޲hU{A^˸$杺fౠ-'ױ)DE}[ۆh2|R˲cOq-73mviz5k.՚у (o\x\.yئ{ ]U[imNܜY67 &aIM0Z$ڔڡjBžaЧÔ6ޮ:T5Kmܠ)z9Z}RYiڴQzA kE&|KHadQ=# #C}[F 2kgV't/HN.N |0DD7z +O~C鰾z}7j9u_D 'Je6Ocq[to_ %LdQ.A C uO;씝f/G8B#^yKqnn<-µA 4)(-ԣ^TIMmC3څ%^ƨC)ٯv}T&cQH9xʓNOYs o /~8/Z0~k9!دx6ln{HT#eT#`^l9dppD:$]{gZ[z op;#> stream xڍtTS[׭("]JRz &!$$ AD( (# H RIxod֚=ag5CTPH,/$T20A A>Hn:؍a ){S`Z($PE$ PH Ov@->: s+߯@N(,..;( $P u9]uB((W N),-qrCaexnP< Ԇ8`: \ X7r8"0U+^uiu0_`Ϳ{( @@3u r_F5`>8yb10v(h G TrWA-@WA~yb ** (wW@^>. AO51=?.ki rsK0zw俪?y::s?'q]W"B]IP_Ղ!\;\AiD av,/74GrAy Ů}|p"J=n~L@X` ^+=χDaRW( ם * u`zm2 &FQPJu]9Pʬ-H1堉`YƮ'Nw<]>w˦wkmK ]'M0F;}h\NŽѳ@fՌz5sM0됔9.6\>~z3̒4cjf~=qPF{d{f]{[^FKH;I=Q+]q(*uYx|/y:͠& IKKMq{ޒTk)]Ķ<ᴰ# m17o},Wu16 c'T{p%!bkˇ؜ym]fƈ֎.C]h,C@BczAE!U!(z g2žeCk8Mhr-k.\IvxG|=O'A3d Fy"ݫy *r\ލPwG/mbIːS!%fF6׶<s`e'(9c)Gk8tҲR~6$.Ӗ6'"-p#ƶ]6j}oE96I`Σ7HZB83%+Gr kΗ_Uc_3lyEaOВ @vy ck/tD~1<(y}X%#Vv?szҬT6^SwY&sxᕓI4)! R{̚ܣCkk.-xboD TH'hw ~A}Z M>`L25QYq1(Mq7mRoxrxs;=C3X5+p*kffgGGLw9.٨{~lW+X+t̄@$4'B =4DNsX pkk;Ӓ]I;ܫtY6aLiy8/~~~Kr{ZD]Xx*Z#!3fHrMb!_]vL׀KW-5-DNUfF^@i"UFJz0#V'AfK&||x#=/~R S?^.fJW~|؁I̽sq"{vԍ4tX's8aC86&rK*]H`&>Aoy3^eA$!2hK}y== -.5`&!yc8R>u'y__,[$$RD;_sCfe2nXzm6A'Azx˖%%I$O?jz4oí? izX6ỹcy/x?u `WH`IcmyN [i#Z{jLX"sJ@ i~YhIƥ1ՒOE7< Le!V@]ɾ蘺_KC%ߋ-sҢ<"dV5̂Ɯ\ >b<=3K*| Clsm?/N a1E85S5y5!MP:XѳzkЌsE:Y\Gy؛bݖzZ\7զ{xu ᣇe79^w3]HRnq6ݎ>b[O૎*nPps7T3I|(iil&qO5:[2,i`(UX{G&E)?KGi$ 1{ fj7k=yWLbtxLnކT/>WrJ}o[I}zXGy`U6-(3"[=yYr/@>膳y]oීȹK"yQÒqݽ}[aU7o#duTT~ҧ,'GEnPm;WtpH>|csmе~x OK#*ód[sKٞjm{POwPϭIkJo?I+.MYy6#H^!]CՐuBd\wp|4q姇Uԡ΀rӽS7ܩ`c29[0&`TF@>@nXip\TydgBuu_2$G=3ᲀ z>ҨX(02mڑz<`2hRb+Qgu3B; GR*&M"~HcU7B}N'DGeGE9܁+{P*h:^ Jzv/Qfa5_l|)t_9}(W:Rſy&zkťyS3e֌WhB)H{ުeS9vMY҉Z[ÉJ0iu]tTe=UUD#/j>Y%xG-lZz*89Pܻ'eҼ )tr` (aI*}VeĖ6 vOA{C̷,!H⍲]SV5-Y謔򷋏2yюK:=rBΟy:[.டD3CxF+sH<uDLEzڎ_gyҟ(oRxQN 96vpF~$^,{^8vcY"d=uaգ'yW},ɢwjB3bz\qm}W[w&7\D(N^*Ry3t}ͿX~n]@Mӗ M$Yp-c+$%'qNw*vLgb1Fb0KmiSzj%A.yO傇vl8%SZdc%pbyMkyW~d"LhzO!al|J}u6VD\L>Jʆ'DVqB #"i7jn~hl 'tQÅxTvpX79COm8N;RDm"B"O5і1OD^ЧI;sM>Q }OVSn1ox(O{˪_VRz@X<CDz(E2i%+2; bߊE=I ikV"H|y+"R+pY:MhvJI67b[LʃkTazo5QQ N eأ߾)dI. 03Q xǗDE>5R]'mNHt]: 1S[܂T-ʍV^B7QL㻖m3 ,ߦOz~H,i>WK]9{3>x,;hJafC5;Ɛ#'s8^v*p8Rۃ_ld?dBG A 8lqf&Pmo?Wu9; 9zO؜,W$Vj.z9T. Ul *ȅ\hGcO u/چwſ>!EBJ\J嘳a@>[Z_]|^ySsKqW|<9Pw>] gœ[MFGDj}/} (#4yi#F[ri\AU~oʃkpeyj'#[%<ݯ!fg> bsVuKfx9v_W`Y;޴ `+|n;|{m癮hݗ<Ϙsk? 7.vGrbyWJ79keۦ"ntvuYko!Fs(e-_S!zv%{t疍dU;n-Z(+{*K q,oxb0zG)MBYӠΓ0Cɍ~iTѢm:M+ȡgDs- ߃bRN{|!\Lqeo{ }N6439SW-8i?\B)or͢9cオy璃Vm%)|e\IN;ܶ㍇cFotZ[4PAݵb򽎋\KG!an cw*2L힢nJUHh~ 3>Z endstream endobj 42 0 obj << /Length1 2439 /Length2 16977 /Length3 0 /Length 18403 /Filter /FlateDecode >> stream xڌP c !]hqwwMp\{pKpw}ܹ+^>{SMNI^$agLWScf01201 RPA#Rhv<X:ί21󫡼-@ `aab01qΑ t2v 'D Q;{Gk~PP9vۀ&@[<dh P3='=#Ɖ\v@ S_ @?)j`(T̜݀ luzuq59^T ǀOs @`ۿ&&v6@[9 l (J18;^@5ҁ ae?LN N`82ⶦv66 [g'Ŀ;L^ZڹzmMabϨn vpIc*B-39ؙXY Ăj_9x^i|f^N@W"33l 0mG_ez?f_~0S;[k16?Uع,vVN7(T񇧴?žv3T5c)ؽ-@{ؙL^??.f(c+p[O=lܺ8&_SMWd vZig.ۚ[Hdv6{6#VkϬ %;'_7 ^pzɿUߌ&v- ;@dz$vv6b# #`0 %0FF߈ (qELF߈(~#6o]7z.f^+FE\ٕ~|*+Q7zͮf^cj_udM\l%oRE,@?%;M@ 3?2Fc ?ܬ+k1&v֯olIll~kM)AWf5ֳs ӫ54-<-@X9a~z2j~=F1#6izVv?k'+l4N `tvC_x;9_1uAd=]k$O*[A?W>2A\3 n*LF=?EJMr D]x-4؍)Nu%Hu\֒h2݊8??(\G@D&` !C…u+^Ws$tv[yC}z^@4E|2XgzbxSw)R8ZD/5w3j,N:W#De漊 W粘 VGH;ѕcv\!ie AFV5?Dç5ssSl*5C w5wᵳs}x%{f\sHlqgQdY)[r;2ګr~}]L.?) +?b0MAZw`QB'3ۯT.O77zBivcFRK_:8Pit b7?o8kjL$TŽbf\ө K%-#Ͷ7ssoaoe[I+88t^%-h&[R1˧:tCw'aA{.y!蚰t!2>`oq(|[^Z2>,K/}oqiѮL8ZBap*7b*̫X{|HEfe4M%^$yO6oOʈ?ץt.Τ JJp q /?(*F2~\jUgfaXik]Ϲ}aj3VDp=7RXid)6=v{ޓMyPLA˴&gb(rDHrRrHFO:Y`Y oɓh3<ػ2,|[iӔLы6?Z†cK.CK 5d#ĄC[ fDgyB(yx=_j$ T?'}>.t 3| ElvҙX!T'`h:a-" J=Pe=\ďF ^3rq~XfdGS Ǒ-ܒ탳‡b3#>" WɧaTgUP%$F-XɍDLTLK/,clXÅʤ؊PTHEIԈz@8/rfmܐЗ \0|#V5Tl%Wo~:(ӿQQ(-1b~)kV!w"*M:0g>RdzФ\m? Zg~ced.g k%F'F$uTIS6iHhS(kJ t<{^,*Bzk3Z1?$p,R{hf?XKF*G**.7幫a eHAis RQ! m8ȣNܟ֚`_MmfaۍIV*dՐ>'szv3 zn@YLbJSܼs]1Y^xxF W0lxT;]uCqݐ$|SBcdz/eV5;]5@1W:N8QճP.M~'c|Ca#2ءBQ%x p$6` g{cgbB`X,zXdp͖z}sd!C;Lv:W9U*J )So$!ٳ?{qj/™F)cv (AF;?*4H0p Dqs$4L9-U 8XI>qs+:7zǜ;2A_4??s&i9}ZeT0`H=")ؘ'EM<8b"m^~,+&~|^Ur+=7X{VCKAGijv>l:/_Zu^Kg%+]ظ֌2o$R 4&+:cpq74dp}ID+677.FU~He:RɾhO\e#vmDstnJ3\‹rI ,-"'F$d*JqJwg$Ybaw3HNP05!{6`N vgq,^ľZH5 B āL ~7!1Pu(.ݕorP x<$QW/#ZW0aҭ tiqT)<bIY&CT ECG_;ݢAc]ech}UJTڒ Iwa#@ۣQ$Nߋ߬ qCsDqGy_zlꔇ! iК^.;7ݔDOaBFn;yRv l5Ji ]VeH+q qm&ƽn᳜_~(=%:ɸ&q L\g2rY~ '$/of ٭7h CZ5~#QaU^TeGJ:8Mۙ (䋮5R3m N'X2r̸XuXd}E#]D'ߚL'ګ̭}cNRصdTM gp7ZF4]v̷rf}(٬&: W9 2b `<=MGQsGH19eV'gn:oT?lRf%"h>4/2R2Gdʭ-$ܔxRJ>EW@X|Ę'A<6t4{kPLH! 턍q*| vЌ ƝT-Ue8QАzmXr+HIgYFKے#1颫2W|(|V|:99GIpXO_sޑJ&S&LtIb奼*xSgP7Nurw.ى B7 "Zy9:j( 5M,Z5gLso0JE\[9 =4n]_8ݷR8%+0ߍvB~,ڡQbxRȧk5/=&5Q7L)/NQW L6!;o)]UصP- ^13F~%0ԋSO8. (UrR _::No늶wvZ^ޥmnσU9Pq"wNA]X!ESa`a}prՀcePf8مϧZΉ̹,³^ŷ'&As_sʊ=hKRTTMiHw~2Ʋ +rkw1+'bZg-0f[S>u1CUCՐ+}x1oD(N ; s2sI޺Z*w}Xٍi5{ۦDrlAFNUx-]L Z7o-ķYء@ Ij"x[8>;l4%hЅ\t@w9: BAvP-pĺ6۵&ܼ*PzQ> 6Efn`Y/N[jw /dY31PZPz{wN>,^noe>Y]U0Y2㌽I|Y:9 }BiM+r>0SBOCu@qGKC_I%yHf R/YHN\Ez%2ra d~%Q1XUpI\ Nj}$O+E1Odk9@Q ~/ XtƠZc̴9h>A՚ܻ|s`ڎ1h)u^yt _8cdT}Tb|"XAYt?!AYE %-*zQx1)cLUrô;Bfr+ϸ3a31e?Iy_ 3`B*I[\$ik*3fXᚾk"hh.oŮckET],'od|h8]6zNyꦹՠ>8V tj0\mwlc ]|<@Ũ++Z t؜ѺB ljPĽP>6*51Z#(v8B~;p>CNnqGb,A N6CzvR=خmƜ5k&;N i} ^/O}@TNe(QDbJ.74p 8Zqh!`覩ғ`;.6GZ~PI AqOqNw4hbڼ O~r]T~*zCIJ0ЌQ!UT4W}ȕwVXc: ??}/?pGns~l ݑ5Z<brXS]&U^[e||G7KM#a|Hg_!GaW2҆R='Jgs5ݳ?,o2Aga\*7jgD*f#2?vnW%pFO.we5IV~bhL]݂$pTGzkɱI)ɾj!pSCJB&׍MYcIZ!?flќGn&T 24}MR7,W/·NҘi/Th)9{<>t{YI]ikJ˅G)]@gI-I }-dU#Lq3eجpO)i3ӟv8Wm>Di(Olq&PWԚ_7]č51w3h6Ѿ]pU]YHF.}WOYӁKȧ8*9!8]6Hsj6HOkĞ$\t[|X5ާ=`W\Ai]!/Siou/ w(nFiGBg-&D#tAe9遪IFp 1q][kE裆6~-[ %@EbKrn!}[߯3͜!:99E"^Ĉ@XIٰkͽ(I|8P(>KC"ͅM%uGVIF`EΟ~f]vE&g.(93$* x*WijČbR13ǻʕ!7,gtu]1,7"3[ ;7$R {g>[L {+ J[&CkJ3Mx!zU1=ABۏOh2{jk9B!cTN!D+!˴.q,bZl d91B(:XZHE+>GZ C5i|}3ҷ"yW3k4}8Bw~%rC? ܫG}!b\ RP#fwLAft߳g%iFB|V8~~6{Zr*KE 4[L_Ee~wp`ݯs1@m¿ZK. 1y8~XR>boE Cϻ>^^v}tBT_eZgvs h|ҷIsGͼe}s9~L00%\Bol)rw3/@mT_c}ff3Aϋ uksB`ic> HMNR`8Ϯ/aeq=ڽ .Nü;)T AQVJx `oQ-2ȸn^B*>&IXl]*>'>JO?^p*x@}ǃqۺ8)YZ ۪X#@0hZ@B^jy-ZNΒj%ם$ I17i`}ta;pd㘢YO<16P a .[ۯ"4 K7dL#Y´ۆ_T"HaL[Q57,d3NhO׾x)@\T#ayzAL*(DE&9ԋK89iCס6+~'τfNt7A +,ODVޟTLhnj||2ՄiOo^J][Q&jD: }$ׅf-%'iLV[d;TeF=YZz:01pv*F% oѰAeܒnLF4Xj6ҥ`&^,#J.^CL^p#7?x^5$J"A¡e_<=*'惝O 43dd5avc禪e] ~ͷTvkS@AM} 9 ̅"ZLX~&`Q$"UBgtzJ+mXKǂϲSIIcerk^\ڵsۋz٤T4zn~_eNR> a@3y&qF٘n*W{j# G 蠌|9Ҥn 3|oZ[;Zϗ? fCzXϝx!Gt[xțL>?(~v V "Vĕ"1sG 4$SxXF\76)R:U3쎂`^pY#Ev0ɼ׊m}n6cW@o%|R(ËġϮ\؝^(Z,{(whgw32MD%!4O{726.KK|7[rc(!˂ʏ@=eRZDF|F4&)2s^BߩSE=U(z֊&OkQn_ =:UPM1Dߗۭ#1 ah"V]э#c0XIڙ dP)e|*e ?CZ ]n$\:PmE a=u")Syl+F3n:$"eVhg}|4x1V;zssS0Ll2Ѯ^,.)5~s& %q\r35<3O`RxkT 'vG)4挆!.Z7HZgn.G(3RU$c1Oؙ9 ቏Rb \ӏ4&^:GUNmB*nFf.5aa}aƝl;ҺD?4 -^nbz"x++x80r:4}gg=ZV)[ާo5Yu:V\ Yv&>ԚcUt" '0+(k,t[W0bm~06hčjH-['SX-3V9'dT6=Kk,G? B6i`Ͻ-茤p)Dgۈт$r&[aPDhzG2 ]-Sjb3nw?7"I!g+дzz}eà>5kne:S=}H侷}9 |'ܳ$ O3٭FƼ/&+ͻE⽕T/pdȜe9feuFx!SxY5u+>.$FBj.#x1LaҞE¸-8nb׶%"dĺ2U.yhҠ]&ZĐCksjy3_p~Dh01 U[mkmh]̪T,SIVӗsN V#vr&TEx߱!Ama"u}ߊSf|x?:˄>>BNBU8qFAJ\> Hi7^. A_)X0CQ 5^˙T\n'P 5~>AX 0týP#7X0&y}dMv%hMڱ?gW+ *Qؾ= [.Y xƆY8B"ɢ#`Ӱh1-Oo?A9K;Nr0*VPuUJQz=sS]1h0 P./ (E)uW?9r3'~j8Tyq1=oI1x*ԃf# A,NFe$ ]QB4( ɾ 1gp"%09Jvor1gD_{%u:ZͦL8_-xJ| Bcl5GvC`uj-Rwd\L7[^3A]8gSsh0A"8u K-k4ˈ)Л2s<8~G}fk+O{mh db+bdPֆ8b$oH[f|]7tiDq$#>fLVbr:|V#T98D+]jW4'{Kk?@0(Zr\V1cK@C'=pҡ̯obԚ<_uh;0 "1zώu:ҥdKZQ p0!iLG=w+ 4ؤ bƃ^maboo2D;N~s7?3Na\Ŧyf N$eo04)gLBv]u<`'Ro‚D}fLDD??O| (Ҽ eD#o/i>&g̞_-ȓql.n5)x`VT/1цB,ԃ@eH;41T4u'Wj09׷WP-y0kk&Mnc˅>BR6wDAoY{~8Hx_EɐwLtT?2a*!M2@s6FfxqcMaH;"a׊HFJ׋1e{8v61׋4MV#ߚzN~ ̵=zv@o;k./dTzA S742\ηڤͭ 8ۅJdYip!uI FAs[[++sF|A+Qd)g0Z}^G-K4/0 )q#G-3"WIfG[!-){ s7/mMBV'5iR5ZPccxx(m>+ԪX˒lKm@d"XPgʌ uߢ[}(ή^-|-8ܑ;1Ϗ]cv2u7C҆0gt kZNвBbihV!DZxYZK=&ly *1疛zXZ(m4\g$%+ )I~H?ۇL=qIDQ DFM ;~Űj2KqO:0g}pN6|awoB>P2BHM^D!]T;E3}XZN(e82L}NЌ37\b~ /e&elʷ#=wss5:I(n{lgreѹ<7;kHbtNuNs>&ZP^Ε~%ӰlǍ+u'cDN? 9 :p#;;'}';Lav7)0?rap18sn|?STBՋCBRzNH cȐa/L#g!g_k665ೡqb<\\|qzoh1);@氶2~@2~ٍgwʷU-ebi l{4qS̽` Lygw\< đ84yKz{p4Q/nN1#u y N? MBҳ騁>{)`kXR[b 0&o9鐄NS;0^cۙw~El$f c2(C5v獤Wz;F|7t~F{Aϯ: C+t @ɓsa)Am$m-/E/DҟRگPZDK@uDA30wYƠ޺C9Y$%uG z]'Lqaq#fKLr 77X8&Q:.ICYa9Hނs)ėp<-=IUX\L.e )5,o黉QY7rh69б.Fٝ1 (Ѵƹm]XO45!ȗ~JC*-N]%pRJR v%VMV4֥CٶL%@X6_3Ŭ|pFf\M1SI;Ge3 $6U ""`1߹]:@-y%iHA~ٯsS߮פ;SmZQ,?4R5&͐ޏ:Yx̋ MIJVR BQV/fD3%d;( ^1G~b_/L0uW+Rdzq2Vm'er0i‰oQW;k*@V4lvY OS( aWZ[0hzԛ' ( EH־`{,H)&o`-GbdX=4eC6l7V-f r{`k\;=1K8L}`Tf1+۩tS7bq$D7%=Z^R6k已HZڨv="-xbuioU 5竤BDgFk~!N {p.R&/Z:Hez`8ϺU'vbuV.$׳%Mw:氝Z%ҩ\ق$'sᲱXҩWPiXтFxk/RP(@4RGQ|fW-w ,KV\f މP.͞P#8y.eY*>WY9mʬv$4 k$pD Ϛ#GmJ Ӏ:d" C47 \D,YKI) œ]flM1o{g9&/);q{ .Pc˂* <2ճbmݻ:|BO|yu&hngkmcd(Go4zLy3om pdteN=ML9d '75J;y58eJцx8Tn J1q|DA.}Zgve2>$ῲڭfq~B=Ϳ)) \n0}~`Y!SFTV4iteKfݚO~~+7\+ұgJ/1]|;S6Vј.Ujt,A]Fm- V9 找*7P:m12m$D+ZA!H$l"bW9?~UumjzURTK^HvЩSd}I7^ fܶOAnX h&9~Μr%:|5+QU# [^݇FpwRRt6m }kcLMWLYPz lLft `lm'Q?uQ:ֻ c^QijGFV7Qkd" Ok b nLeD\'nΰO^ȣ L&3"+s޶?n(a2A&$Sn|fEOӎ*VM,q;M _!OZ7pR7YTsΙ 7fYݭhv[fzrUC QR$DТLP=KɨGĿs<҆xnLR4*:Y5 KgM {NU 'ǻyg=lkց"W(e%V#[^,~P?r1[DM}vE1WuQɲH }.a1,6V12PzREwve6gV5M{r\7{%X/yT0s~[>w 44Q'ᦲ^o^$Z`Tq.?+i?4Գ x_ЋNHs;Qjձ56ovӪQEApF\%sx*" ϶m@y&, {;n$B(]Wc92=g833i_q&Yfㆇ4.. >YZnUsCO}^fU[6nHt24Es \L) ^\J" IrSrGY)ˤ}"crPSHع'+ Sx 7ݳ @h;v[.wJL:"@,{BF|SssQ'&)wm_k\o%V*%gWĭ|Dp-%çK%IW<{W3i!ifOK,_]0+, D]F昨@w$; ;X: endstream endobj 44 0 obj << /Length1 1429 /Length2 1958 /Length3 0 /Length 2876 /Filter /FlateDecode >> stream xڍT 8UkNNEu}dg0)S̑v>gčFWD M r!iPR2e*CBu}~Z][SUrб2`[!'=]SUD8tfjŠCg q03\: 1q1H e  u8U+B8:_~EiO P 8C8[с%4L81 u, w B`* 6AAh8Ug 'bs .JY[x;&΀f`vsQlN'C # |4['] *Bt6ˇB a!`k lu]4#^Tfj Q'al DhTh *B`.lo=\o> z==G 3 Qfc2#4P 8,.}G G$*B?Aqߪcn6ccBx`+D׿1Q( >}}kLf@IBlo(fvK! И&&[h|67٧SEU'ȖKO5f:4 g SnQ{iHm`+¡Lsc%AaW,@H E n6FimP *@,0& @S#MuQKt`DGJȈg|& ߙfr=Q,&i` o I+CuH]6!ݢL'$XV?Ih_:@:Oi B{WQOH[;yԭk[?Ȏ]{7*-=m@#*>ܫ^ў)1:ՇL$^HK'9zy?.mY@/YH91,V]`AvkScj^Ҝ *m]'-M :d,q1>&sþUL}W~RzrkBT^J#ݬ"%rI~zJLNOlӧq;b*bUQ#ŧS*f'ح$ړ~*bnrڿ>X[.ߖ_wS^9V $9xvUs w?~3E\ Yv׸wwY  xK߻~6?vQ^Y ?> endobj 11 0 obj << /Type /ObjStm /N 33 /First 241 /Length 1497 /Filter /FlateDecode >> stream xXS7_v:N >Jh)2<\́ޝ_J68C,Vvw'Y 4Y`B1 &S0a6 (D`VBL1=9ua^iIDŽIτ9ˠ=S2Le4|:/:*@`JZ4V X "cZj`i 6 Na[%!@2YIA~Ut.n)Dj<-ED~q1̷Ʒ,B %~{2jޅ6.bǐ4^&ps.b8'DzI$tr5;;5;}0Kԓ =o~7\5T7Xr3WB(u7)e J;wg?寧tgL2idWq& U$#ڮc62ҪW]AI {de)gLtWC{1rD`t! EG MjѝTGOpc "#h;νJkgq$wMtq86PmMz$]S8DP6Nd]+WLpBk/-RajT01?ۄA^RCy|į%xWCR 0O_*:c1ohCMODAYK:OwOz| tf%+9Gy3?-!S/Eڮzoof/v+@tNomV6 :k> zɯ?"wOf;n_:^oXq%H_ ٕ`@tߎ' GG++8pr6!pr:[|35}@q~ͫp5(GKŨUH~,{6GY7l\;ɠA $҈g'uF*iOFӊ ݘȻEQVqY>S/KzW7MPy!nRK>oSbBV^}Ǎ;,3z&BZN%knmUt+MMzG˴[2x׷ݺ[C۶VVb}+uѸKe8M۷`r-7}z[ƗMP>G~Q')lË녾GƿL2m}q :I]CO endstream endobj 48 0 obj << /Type /XRef /Index [0 49] /Size 49 /W [1 3 1] /Root 46 0 R /Info 47 0 R /ID [<79032114F80F245BD201659E15794962> <79032114F80F245BD201659E15794962>] /Length 140 /Filter /FlateDecode >> stream x=p|" "kS8EfP;noy b#Vb!fb-d_+bq[*"yvEΞ=A D.%?W5"NkL\ q߹j}uMjĖ, /' endstream endobj startxref 71669 %%EOF proxy/inst/NEWS.Rd0000644000176200001440000001001014057153351013447 0ustar liggesusers\name{NEWS} \title{News for Package \pkg{proxy}} \section{Changes in version 0.4-26}{ \itemize{ \item Change coercion function for cosine similarity to distance to 1-x instead of 1-abs(x) } } \section{Changes in version 0.4-25}{ \itemize{ \item fix scaling for gower metric in case of cross-proximities to scale x and y together.y } } \section{Changes in version 0.4-24}{ \itemize{ \item fix tests in apply.R to comply to new default behavior of encoding character strings in data frames } } \section{Changes in version 0.4-23}{ \itemize{ \item smaller changes in C code to prevent false positives from automated CRAN code checking } } \section{Changes in version 0.4-21.1}{ \itemize{ \item Change registry class name to \code{proxy_registry} to prevent name clashes with registry package. } } \section{Changes in version 0.4-21}{ \itemize{ \item some more C-code \code{PROTECT}ing. } } \section{Changes in version 0.4-20}{ \itemize{ \item print method for \code{simil} objects added that prints \code{NA} instead of 0 for diagonal elements, if requested. } } \section{Changes in version 0.4-19}{ \itemize{ \item fix for binary distance (C-code): now has same behavior as the corresponding distance method in the \pkg{stats} package in case of missing values. } } \section{Changes in version 0.4-17}{ \itemize{ \item \code{pr_simil2dist} now computes \eqn{1 - abs(x)} instead of \eqn{1 - x}. } } \section{Changes in version 0.4-15}{ \itemize{ \item add imports from recommended packages. } } \section{Changes in version 0.4-14}{ \itemize{ \item export as.matrix() generic. } } \section{Changes in version 0.4-11}{ \itemize{ \item Make binary similarities work with binary data in case of list data (by coercing list to a matrix). } } \section{Changes in version 0.4-10}{ \itemize{ \item Bug in Fager/McGown measure fixed. } } \section{Changes in version 0.4-9}{ \itemize{ \item Author/Maintainer removed. } } \section{Changes in version 0.4-8}{ \itemize{ \item PACKAGE="proxy" argument added to all .External-calls. "PACKAGE" field added to the registry, defaulting to "proxy", and used by .Call(). } } \section{Changes in version 0.4-7}{ \itemize{ \item the method argument of \code{dist()} now also accepts a registry entry to avoid unnecessary lookups in repeated calls to \code{dist()}. } } \section{Changes in version 0.4-6}{ \itemize{ \item bugfix: \code{names<-.dist} did not accept \code{NULL} as assignment. } } \section{Changes in version 0.4-5}{ \itemize{ \item small vignette with an overview of existing methods added. } } \section{Changes in version 0.4-4}{ \itemize{ \item Podani, Chord, geodesic, Hellinger and Whittaker dissimilarities added. \item Fager/McGown anf Faith similarities added. } } \section{Changes in version 0.4-3}{ \itemize{ \item \code{pr_dist2simil} now computes \eqn{1 / (1 + x)} instead of \eqn{1 / (1 - x)} \item fix labeling bug for list data with named components \item the C-level dist functions now conform with those in package \pkg{stats} with respect to NA handling (pairwise-complete observations). } } \section{Changes in version 0.4-2}{ \itemize{ \item \code{as.matrix.dist} by default now uses 0 for diagonal elements } } \section{Changes in version 0.4}{ \itemize{ \item fix a bug in Kullback-Leibler-distance } } \section{Changes in version 0.3}{ \itemize{ \item fixed Gower-dissimilarity \item fixed dist/simil subsetting \item self-proximities now default to NA } } \section{Changes in version 0.2}{ \itemize{ \item data frames are auto-converted to matrices if all variables are either numeric, logical, or complex (to be compatible to \code{stats::dist}) \item pairwise distances can now be computed \item Registry is now case-insensitive } } \section{Changes in version 0.1}{ \itemize{ \item Initial release } }