fastDummies/0000755000176200001440000000000014451543216012540 5ustar liggesusersfastDummies/NAMESPACE0000644000176200001440000000015713730471551013763 0ustar liggesusers# Generated by roxygen2: do not edit by hand export(dummy_cols) export(dummy_columns) export(dummy_rows) fastDummies/LICENSE0000644000176200001440000000005413465035615013547 0ustar liggesusersYEAR: 2019 COPYRIGHT HOLDER: Jacob Kaplan fastDummies/man/0000755000176200001440000000000014444350047013313 5ustar liggesusersfastDummies/man/dummy_columns.Rd0000644000176200001440000000517314126414510016474 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dummy_cols.R \name{dummy_columns} \alias{dummy_columns} \title{Fast creation of dummy variables} \usage{ dummy_columns( .data, select_columns = NULL, remove_first_dummy = FALSE, remove_most_frequent_dummy = FALSE, ignore_na = FALSE, split = NULL, remove_selected_columns = FALSE, omit_colname_prefix = FALSE ) } \arguments{ \item{.data}{An object with the data set you want to make dummy columns from.} \item{select_columns}{Vector of column names that you want to create dummy variables from. If NULL (default), uses all character and factor columns.} \item{remove_first_dummy}{Removes the first dummy of every variable such that only n-1 dummies remain. This avoids multicollinearity issues in models.} \item{remove_most_frequent_dummy}{Removes the most frequently observed category such that only n-1 dummies remain. If there is a tie for most frequent, will remove the first (by alphabetical order) category that is tied for most frequent.} \item{ignore_na}{If TRUE, ignores any NA values in the column. If FALSE (default), then it will make a dummy column for value_NA and give a 1 in any row which has a NA value.} \item{split}{A string to split a column when multiple categories are in the cell. For example, if a variable is Pets and the rows are "cat", "dog", and "turtle", each of these pets would become its own dummy column. If one row is "cat, dog", then a split value of "," this row would have a value of 1 for both the cat and dog dummy columns.} \item{remove_selected_columns}{If TRUE (not default), removes the columns used to generate the dummy columns.} \item{omit_colname_prefix}{If TRUE (not default) and `length(select_columns) == 1`, omit pre-pending the name of `select_columns` to the names of the newly generated dummy columns} } \description{ dummy_columns() quickly creates dummy (binary) columns from character and factor type columns in the inputted data. This function is useful for statistical analysis when you want binary columns rather than character columns. } \examples{ crime <- data.frame(city = c("SF", "SF", "NYC"), year = c(1990, 2000, 1990), crime = 1:3) dummy_cols(crime) # Include year column dummy_cols(crime, select_columns = c("city", "year")) # Remove first dummy for each pair of dummy columns made dummy_cols(crime, select_columns = c("city", "year"), remove_first_dummy = TRUE) } \seealso{ \code{\link{dummy_rows}} For creating dummy rows Other dummy functions: \code{\link{dummy_cols}()}, \code{\link{dummy_rows}()} } \concept{dummy functions} fastDummies/man/dummy_cols.Rd0000644000176200001440000000550014126414510015746 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dummy_cols.R \name{dummy_cols} \alias{dummy_cols} \title{Fast creation of dummy variables} \usage{ dummy_cols( .data, select_columns = NULL, remove_first_dummy = FALSE, remove_most_frequent_dummy = FALSE, ignore_na = FALSE, split = NULL, remove_selected_columns = FALSE, omit_colname_prefix = FALSE ) } \arguments{ \item{.data}{An object with the data set you want to make dummy columns from.} \item{select_columns}{Vector of column names that you want to create dummy variables from. If NULL (default), uses all character and factor columns.} \item{remove_first_dummy}{Removes the first dummy of every variable such that only n-1 dummies remain. This avoids multicollinearity issues in models.} \item{remove_most_frequent_dummy}{Removes the most frequently observed category such that only n-1 dummies remain. If there is a tie for most frequent, will remove the first (by alphabetical order) category that is tied for most frequent.} \item{ignore_na}{If TRUE, ignores any NA values in the column. If FALSE (default), then it will make a dummy column for value_NA and give a 1 in any row which has a NA value.} \item{split}{A string to split a column when multiple categories are in the cell. For example, if a variable is Pets and the rows are "cat", "dog", and "turtle", each of these pets would become its own dummy column. If one row is "cat, dog", then a split value of "," this row would have a value of 1 for both the cat and dog dummy columns.} \item{remove_selected_columns}{If TRUE (not default), removes the columns used to generate the dummy columns.} \item{omit_colname_prefix}{If TRUE (not default) and `length(select_columns) == 1`, omit pre-pending the name of `select_columns` to the names of the newly generated dummy columns} } \value{ A data.frame (or tibble or data.table, depending on input data type) with same number of rows as inputted data and original columns plus the newly created dummy columns. } \description{ Quickly create dummy (binary) columns from character and factor type columns in the inputted data (and numeric columns if specified.) This function is useful for statistical analysis when you want binary columns rather than character columns. } \examples{ crime <- data.frame(city = c("SF", "SF", "NYC"), year = c(1990, 2000, 1990), crime = 1:3) dummy_cols(crime) # Include year column dummy_cols(crime, select_columns = c("city", "year")) # Remove first dummy for each pair of dummy columns made dummy_cols(crime, select_columns = c("city", "year"), remove_first_dummy = TRUE) } \seealso{ \code{\link{dummy_rows}} For creating dummy rows Other dummy functions: \code{\link{dummy_columns}()}, \code{\link{dummy_rows}()} } \concept{dummy functions} fastDummies/man/dummy_rows.Rd0000644000176200001440000000354213630520503016003 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/dummy_rows.R \name{dummy_rows} \alias{dummy_rows} \title{Fast creation of dummy rows} \usage{ dummy_rows( .data, select_columns = NULL, dummy_value = NA, dummy_indicator = FALSE ) } \arguments{ \item{.data}{An object with the data set you want to make dummy columns from.} \item{select_columns}{If NULL (default), uses all character, factor, and Date columns to produce categories to make the dummy rows by. If not NULL, you manually enter a string or vector of strings of columns name(s).} \item{dummy_value}{Value of the row for columns that are not selected. Default is a value of NA.} \item{dummy_indicator}{Adds binary column to say if row is dummy or not (i.e. included in original data or not)} } \value{ A data.frame (or tibble or data.table, depending on input data type) with same number of columns as inputted data and original rows plus the newly created dummy rows } \description{ dummy_rows() quickly creates dummy rows to fill in missing rows based on all combinations of available character, factor, and date columns (if not otherwise specified). This is useful for creating balanced panel data. Columns that are not character, factor, or dates are filled in with NA (or whatever value you specify). } \examples{ crime <- data.frame(city = c("SF", "SF", "NYC"), year = c(1990, 2000, 1990), crime = 1:3) dummy_rows(crime) # Include year column dummy_rows(crime, select_columns = c("city", "year")) # m=Make dummy value 0 dummy_rows(crime, select_columns = c("city", "year"), dummy_value = 0) # Add a dummy indicator dummy_rows(crime, select_columns = c("city", "year"), dummy_indicator = TRUE) } \seealso{ \code{\link{dummy_cols}} For creating dummy columns Other dummy functions: \code{\link{dummy_cols}()}, \code{\link{dummy_columns}()} } \concept{dummy functions} fastDummies/DESCRIPTION0000644000176200001440000000256014451543216014251 0ustar liggesusersPackage: fastDummies Type: Package Title: Fast Creation of Dummy (Binary) Columns and Rows from Categorical Variables Version: 1.7.3 Authors@R: c( person("Jacob", "Kaplan", email = "jkkaplan6@gmail.com", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-0601-0387")), person("Benjamin", "Schlegel", email = "kontakt@benjaminschlegl.ch", role = "ctb")) Description: Creates dummy columns from columns that have categorical variables (character or factor types). You can also specify which columns to make dummies out of, or which columns to ignore. Also creates dummy rows from character, factor, and Date columns. This package provides a significant speed increase from creating dummy variables through model.matrix(). Depends: R (>= 2.10) Imports: data.table, tibble, stringr License: MIT + file LICENSE Encoding: UTF-8 URL: https://github.com/jacobkap/fastDummies, https://jacobkap.github.io/fastDummies/ BugReports: https://github.com/jacobkap/fastDummies/issues RoxygenNote: 7.2.3 Suggests: testthat (>= 2.1.0), knitr, rmarkdown, covr, spelling VignetteBuilder: knitr Language: en-US NeedsCompilation: no Packaged: 2023-07-05 13:40:13 UTC; jkkap Author: Jacob Kaplan [aut, cre] (), Benjamin Schlegel [ctb] Maintainer: Jacob Kaplan Repository: CRAN Date/Publication: 2023-07-06 13:50:06 UTC fastDummies/build/0000755000176200001440000000000014451271273013640 5ustar liggesusersfastDummies/build/vignette.rds0000644000176200001440000000040414451271273016175 0ustar liggesusersb```b`afd`b2 @8hnbvf^nJinnnQ~y^Pn TR E,(31)'e_JJ1 %~< Un(zT+0]QTTx*% m`# F1 8Y@bPsYsSlPA̜T89 d% 5/N,/Q-zGf/e @]X sL+%$Q/.AfastDummies/tests/0000755000176200001440000000000014444347737013716 5ustar liggesusersfastDummies/tests/spelling.R0000644000176200001440000000024413417630134015637 0ustar liggesusersif(requireNamespace('spelling', quietly = TRUE)) spelling::spell_check_test(vignettes = TRUE, error = FALSE, skip_on_cran = TRUE) fastDummies/tests/testthat/0000755000176200001440000000000014451543216015542 5ustar liggesusersfastDummies/tests/testthat/test-rows-warnings-errors.R0000644000176200001440000000712013533224532022771 0ustar liggesuserscontext("Checks dummy_rows for warnings, errors, and silent") load(system.file("testdata", "fastDummies_data.rda", package = "fastDummies")) error_data <- data.frame(numbers = 1:10, number2 = 11:20, stringsAsFactors = FALSE) test_that("Error on stop conditions", { expect_error(dummy_rows(error_data)) expect_error(dummy_rows(error_data), paste0("No character, factor, or Date columns found.", " Please use select_columns")) }) test_that("There are warnings or errors", { # One column data.frame or vector expect_error(dummy_rows(fastDummies_example$gender)) expect_error(dummy_rows(fastDummies_example$dates)) expect_error(dummy_rows(fastDummies_example$numbers)) expect_error(dummy_rows(fastDummies_example[, "gender", drop = FALSE])) expect_error(dummy_rows(fastDummies_example[, "dates", drop = FALSE])) expect_error(dummy_rows(fastDummies_example[, "numbers", drop = FALSE])) expect_error(dummy_rows(fastDummies_example_tibble$gender)) expect_error(dummy_rows(fastDummies_example_tibble$dates)) expect_error(dummy_rows(fastDummies_example_tibble$numbers)) expect_error(dummy_rows(fastDummies_example_tibble[, "gender", drop = FALSE])) expect_error(dummy_rows(fastDummies_example_tibble[, "dates", drop = FALSE])) expect_error(dummy_rows(fastDummies_example_tibble[, "numbers", drop = FALSE])) expect_error(dummy_rows(fastDummies_example_DT$gender)) expect_error(dummy_rows(fastDummies_example_DT$dates)) expect_error(dummy_rows(fastDummies_example_DT$numbers)) expect_error(dummy_rows(fastDummies_example_DT[, "gender", drop = FALSE])) expect_error(dummy_rows(fastDummies_example_DT[, "dates", drop = FALSE])) expect_error(dummy_rows(fastDummies_example_DT[, "numbers", drop = FALSE])) }) test_that("There are no warnings or errors", { expect_silent(dummy_cols(crime)) expect_silent(dummy_cols(crime$city)) expect_silent(dummy_cols(crime$year)) expect_silent(dummy_cols(crime_full)) expect_silent(dummy_cols(fastDummies_example)) expect_silent(dummy_cols(fastDummies_full)) expect_silent(dummy_cols(no_dummies_needed)) expect_silent(dummy_cols(crime_DT)) expect_silent(dummy_cols(crime_DT$city)) expect_silent(dummy_cols(crime_DT$year)) expect_silent(dummy_cols(crime_full_DT)) expect_silent(dummy_cols(fastDummies_example_DT)) expect_silent(dummy_cols(fastDummies_full_DT)) expect_silent(dummy_cols(no_dummies_needed_DT)) expect_silent(dummy_cols(crime_tibble)) expect_silent(dummy_cols(crime_tibble$city)) expect_silent(dummy_cols(crime_tibble$year)) expect_silent(dummy_cols(crime_full_tibble)) expect_silent(dummy_cols(fastDummies_example_tibble)) expect_silent(dummy_cols(fastDummies_full_tibble)) expect_silent(dummy_cols(no_dummies_needed_tibble)) expect_silent(dummy_rows(crime)) expect_silent(dummy_rows(crime_full)) expect_silent(dummy_rows(fastDummies_example)) expect_silent(dummy_rows(fastDummies_full)) expect_silent(dummy_rows(no_dummies_needed)) expect_silent(dummy_rows(crime_DT)) expect_silent(dummy_rows(crime_full_DT)) expect_silent(dummy_rows(fastDummies_example_DT)) expect_silent(dummy_rows(fastDummies_full_DT)) expect_silent(dummy_rows(no_dummies_needed_DT)) expect_silent(dummy_rows(crime_tibble)) expect_silent(dummy_rows(crime_full_tibble)) expect_silent(dummy_rows(fastDummies_example_tibble)) expect_silent(dummy_rows(fastDummies_full_tibble)) expect_silent(dummy_rows(no_dummies_needed_tibble)) }) fastDummies/tests/testthat/test-columns-split.R0000644000176200001440000000607313630563351021461 0ustar liggesusersID <- seq(1:4) pets <- c("dog", "cat;dog;mouse", "dog;mouse", "cat") df <- data.frame("ID" = ID, "pets" = pets, stringsAsFactors = FALSE) split_test <- data.frame( Theory = c("Behaviourism", "Behaviourism, Cognitive", "Behaviourism, Gestalt", "Behaviourism, Psychodynamic", "Behaviourism, Psychodynamic, Cognitive"), Format = c("16mm", "16mm, 35mm", "16mm, 35mm, VHS", "16mm, 35mm, VHS", "35mm, VHS")) test_that("split parameter works", { expect_named(dummy_cols(df, split = ";"), c("ID", "pets", "pets_cat", "pets_dog", "pets_mouse")) expect_equal(dummy_cols(df, split = ";")$pets_dog, c(1, 1, 1, 0)) expect_equal(dummy_cols(df, split = ";")$pets_cat, c(0, 1, 0, 1)) expect_equal(dummy_cols(df, split = ";")$pets_mouse, c(0, 1, 1, 0)) expect_named(dummy_cols(split_test, split = ","), c("Theory", "Format", "Theory_Behaviourism", "Theory_Cognitive", "Theory_Gestalt", "Theory_Psychodynamic", "Format_16mm", "Format_35mm", "Format_VHS")) expect_named(dummy_cols(split_test, split = ", "), c("Theory", "Format", "Theory_Behaviourism", "Theory_Cognitive", "Theory_Gestalt", "Theory_Psychodynamic", "Format_16mm", "Format_35mm", "Format_VHS")) expect_equal(dummy_cols(split_test, split = ",")$Theory_Behaviourism, c(1, 1, 1, 1, 1)) expect_equal(dummy_cols(split_test, split = ",")$Theory_Cognitive, c(0, 1, 0, 0, 1)) expect_equal(dummy_cols(split_test, split = ",")$Theory_Gestalt, c(0, 0, 1, 0, 0)) expect_equal(dummy_cols(split_test, split = ",")$Theory_Psychodynamic, c(0, 0, 0, 1, 1)) expect_equal(dummy_cols(split_test, split = ",")$Format_16mm, c(1, 1, 1, 1, 0)) expect_equal(dummy_cols(split_test, split = ",")$Format_35mm, c(0, 1, 1, 1, 1)) expect_equal(dummy_cols(split_test, split = ",")$Format_VHS, c(0, 0, 1, 1, 1)) expect_equal(dummy_cols(split_test, split = ", ")$Theory_Behaviourism, c(1, 1, 1, 1, 1)) expect_equal(dummy_cols(split_test, split = ", ")$Theory_Cognitive, c(0, 1, 0, 0, 1)) expect_equal(dummy_cols(split_test, split = ", ")$Theory_Gestalt, c(0, 0, 1, 0, 0)) expect_equal(dummy_cols(split_test, split = ", ")$Theory_Psychodynamic, c(0, 0, 0, 1, 1)) expect_equal(dummy_cols(split_test, split = ", ")$Format_16mm, c(1, 1, 1, 1, 0)) expect_equal(dummy_cols(split_test, split = ", ")$Format_35mm, c(0, 1, 1, 1, 1)) expect_equal(dummy_cols(split_test, split = ", ")$Format_VHS, c(0, 0, 1, 1, 1)) }) fastDummies/tests/testthat/test-rows-right-values.R0000644000176200001440000000715513224606707022256 0ustar liggesuserscontext("dummy_rows_returns right data set") load(system.file("testdata", "fastDummies_data.rda", package = "fastDummies")) test_that("dummy_rows return expected data.frame", { expect_equal(dummy_rows(no_dummies_needed), no_dummies_needed) expect_equal(dummy_rows(no_dummies_needed, select_columns = "animals"), no_dummies_needed) expect_equal(dummy_rows(no_dummies_needed, select_columns = "food"), no_dummies_needed) expect_equal(dummy_rows(no_dummies_needed, select_columns = c("animals", "food")), no_dummies_needed) expect_equal(dummy_rows(no_dummies_needed, dummy_indicator = TRUE), cbind(no_dummies_needed, dummy_indicator = rep(0, 4))) expect_equal(dummy_rows(no_dummies_needed, dummy_indicator = TRUE, select_columns = "animals"), cbind(no_dummies_needed, dummy_indicator = rep(0, 4))) expect_equal(dummy_rows(no_dummies_needed, dummy_indicator = TRUE, select_columns = "food"), cbind(no_dummies_needed, dummy_indicator = rep(0, 4))) expect_equal(dummy_rows(no_dummies_needed, dummy_indicator = TRUE, select_columns = c("animals", "food")), cbind(no_dummies_needed, dummy_indicator = rep(0, 4))) # fastDummies_example data - FULL expect_equal(dummy_rows(fastDummies_example), fastDummies_full) expect_equal(dummy_rows(fastDummies_example, select_columns = c("gender", "animals", "dates")), fastDummies_full) expect_equal(dummy_rows(fastDummies_example, dummy_indicator = TRUE), cbind(fastDummies_full, dummy_indicator = c(0, 0, 0, 1, 1, 1, 1, 1))) expect_equal(dummy_rows(fastDummies_example, dummy_indicator = TRUE, select_columns = c("gender", "animals", "dates")), cbind(fastDummies_full, dummy_indicator = c(0, 0, 0, 1, 1, 1, 1, 1))) # fastDummies_example data - not full expect_equal(dummy_rows(fastDummies_example, select_columns = "animals"), fastDummies_example) expect_equal(dummy_rows(fastDummies_example, select_columns = "gender"), fastDummies_example) expect_equal(dummy_rows(fastDummies_example, select_columns = "dates"), fastDummies_example) expect_equal(dummy_rows(fastDummies_example, select_columns = "animals", dummy_indicator = TRUE), cbind(fastDummies_example, dummy_indicator = rep(0, 3))) expect_equal(dummy_rows(fastDummies_example, select_columns = "gender", dummy_indicator = TRUE), cbind(fastDummies_example, dummy_indicator = rep(0, 3))) expect_equal(dummy_rows(fastDummies_example, select_columns = "dates", dummy_indicator = TRUE), cbind(fastDummies_example, dummy_indicator = rep(0, 3))) # Crime dataset expect_equal(dummy_rows(crime, select_columns = c("city", "year")), crime_full) expect_equal(dummy_rows(crime), crime) expect_equal(dummy_rows(crime, select_columns = c("year", "city")), crime_full) expect_equal(dummy_rows(crime, select_columns = "city"), crime) expect_equal(dummy_rows(crime, select_columns = "year"), crime) }) fastDummies/tests/testthat/test-omit-colname-prefix.R0000644000176200001440000000277214126414510022522 0ustar liggesuserssample_data <- structure( list( colA = c("a", "a", "a", "b", "b", "c", "c", "c", "c", "c"), colB = c(1, 1, 1, 2, 2, 3, 3, 3, 3, 3), colC = c( "val1", "val2", "val3", "val1", "val2", "val7", "val2", "val4", "val6", "val8" ) ), row.names = c(NA, -10L), class = c("tbl_df", "tbl", "data.frame") ) test_that("omit_colname_prefix works", { expect_named( dummy_cols( sample_data, c("colC"), remove_selected_columns = TRUE, omit_colname_prefix = TRUE ), c( "colA", "colB", "val1", "val2", "val3", "val4", "val6", "val7", "val8" ) ) }) test_that("omit_colname_prefix does not remove prefix when >1 select_columns", { expect_named( dummy_cols( sample_data, c("colB", "colC"), remove_selected_columns = TRUE, omit_colname_prefix = TRUE ), c( "colA", "colB_1", "colB_2", "colB_3", "colC_val1", "colC_val2", "colC_val3", "colC_val4", "colC_val6", "colC_val7", "colC_val8" ) ) }) fastDummies/tests/testthat/test-rows-type.R0000644000176200001440000000306213223024363020605 0ustar liggesuserscontext("Columns don't change type") load(system.file("testdata", "fastDummies_data.rda", package = "fastDummies")) test_that("Columns keep same type", { expect_is(dummy_rows(no_dummies_needed)$animals, "factor") expect_is(dummy_rows(no_dummies_needed, dummy_indicator = TRUE)$animals, "factor") expect_is(dummy_rows(no_dummies_needed, select_columns = "animals", dummy_indicator = TRUE)$animals, "factor") expect_is(dummy_rows(crime)$city, "factor") expect_is(dummy_rows(crime)$year, "numeric") expect_is(dummy_rows(crime)$crime, "integer") expect_is(dummy_rows(crime, dummy_indicator = TRUE)$city, "factor") expect_is(dummy_rows(crime, dummy_indicator = TRUE)$dummy_indicator, "integer") expect_is(dummy_rows(crime, dummy_indicator = TRUE)$year, "numeric") expect_is(dummy_rows(crime, select_columns = "city", dummy_indicator = TRUE)$city, "factor") expect_is(dummy_rows(crime, select_columns = "year", dummy_indicator = TRUE)$year, "numeric") expect_is(dummy_rows(fastDummies_example)$dates, "Date") expect_is(dummy_rows(fastDummies_example, select_columns = "dates")$dates, "Date") expect_is(dummy_rows(fastDummies_example, select_columns = "dates", dummy_indicator = TRUE)$dates, "Date") }) fastDummies/tests/testthat/test-columns-dimensions.R0000644000176200001440000002264313570021707022474 0ustar liggesuserscontext("dummy_cols returns proper dimensions") load(system.file("testdata", "fastDummies_data.rda", package = "fastDummies")) test_that("dummy_cols returns same number of rows as inputted", { expect_equal(nrow(dummy_cols(no_dummies_needed)), nrow(no_dummies_needed)) expect_equal(nrow(dummy_cols(crime)), nrow(crime)) expect_equal(nrow(dummy_cols(fastDummies_example)), nrow(fastDummies_example)) # With remove_first_dummy = TRUE expect_equal(nrow(dummy_cols(no_dummies_needed, remove_first_dummy = TRUE)), nrow(no_dummies_needed)) expect_equal(nrow(dummy_cols(crime, remove_first_dummy = TRUE)), nrow(crime)) expect_equal(nrow(dummy_cols(fastDummies_example, remove_first_dummy = TRUE)), nrow(fastDummies_example)) # With select_columns expect_equal(nrow(dummy_cols(no_dummies_needed, select_columns = "animals")), nrow(no_dummies_needed)) expect_equal(nrow(dummy_cols(crime, select_columns = "city")), nrow(crime)) expect_equal(nrow(dummy_cols(crime, select_columns = "crime")), nrow(crime)) expect_equal(nrow(dummy_cols(crime, select_columns = c("crime", "city"))), nrow(crime)) expect_equal(nrow(dummy_cols(fastDummies_example, select_columns = c("numbers", "dates"))), nrow(fastDummies_example)) expect_equal(nrow(dummy_cols(fastDummies_example, select_columns = c("numbers", "gender"))), nrow(fastDummies_example)) expect_equal(nrow(dummy_cols(fastDummies_example, select_columns = "dates")), nrow(fastDummies_example)) }) test_that("dummy_cols returns same number of rows as inputted - vector", { expect_equal(nrow(dummy_cols(fastDummies_example$gender)), length(fastDummies_example$gender)) expect_equal(nrow(dummy_cols(fastDummies_example$numbers)), length(fastDummies_example$numbers)) expect_equal(nrow(dummy_cols(fastDummies_example$dates)), length(fastDummies_example$dates)) expect_equal(nrow(dummy_cols(1:100)), 100) expect_equal(nrow(dummy_cols(fastDummies_example[, "gender", drop = FALSE])), nrow(fastDummies_example[, "gender", drop = FALSE])) expect_equal(nrow(dummy_cols(fastDummies_example[, "numbers", drop = FALSE])), nrow(fastDummies_example[, "numbers", drop = FALSE])) expect_equal(nrow(dummy_cols(fastDummies_example[, "dates", drop = FALSE])), nrow(fastDummies_example[, "dates", drop = FALSE])) }) test_that("dummy_cols returns expected number of columns", { expect_equal(ncol(dummy_cols(no_dummies_needed)), 6) expect_equal(ncol(dummy_cols(no_dummies_needed, remove_first_dummy = TRUE)), 4) expect_equal(ncol(dummy_cols(no_dummies_needed, select_columns = "animals")), 4) expect_equal(ncol(dummy_cols(no_dummies_needed, select_columns = "animals", remove_first_dummy = TRUE)), 3) expect_equal(ncol(dummy_cols(crime)), 5) expect_equal(ncol(dummy_cols(crime, remove_first_dummy = TRUE)), 4) expect_equal(ncol(dummy_cols(crime, select_columns = "city")), 5) expect_equal(ncol(dummy_cols(crime, select_columns = "city", remove_first_dummy = TRUE)), 4) expect_equal(ncol(dummy_cols(crime, select_columns = "year")), 5) expect_equal(ncol(dummy_cols(crime, select_columns = "year", remove_first_dummy = TRUE)), 4) expect_equal(ncol(dummy_cols(crime, select_columns = c("city", "year"))), 7) expect_equal(ncol(dummy_cols(crime, select_columns = c("city", "year"), remove_first_dummy = TRUE)), 5) expect_equal(ncol(dummy_cols(fastDummies_example)), 8) expect_equal(ncol(dummy_cols(fastDummies_example, remove_first_dummy = TRUE)), 6) expect_equal(ncol(dummy_cols(fastDummies_example, select_columns = "gender")), 6) expect_equal(ncol(dummy_cols(fastDummies_example, select_columns = "gender", remove_first_dummy = TRUE)), 5) expect_equal(ncol(dummy_cols(fastDummies_example, select_columns = "dates")), 6) expect_equal(ncol(dummy_cols(fastDummies_example, select_columns = "dates", remove_first_dummy = TRUE)), 5) expect_equal(ncol(dummy_cols(fastDummies_example, select_columns = c("dates", "gender", "numbers"))), 11) expect_equal(ncol(dummy_cols(fastDummies_example, select_columns = c("dates", "gender", "numbers"), remove_first_dummy = TRUE)), 8) }) test_that("returns expected number of columns - remove most common", { expect_equal(ncol(dummy_cols(no_dummies_needed, remove_most_frequent_dummy = TRUE)), 4) expect_equal(ncol(dummy_cols(no_dummies_needed, select_columns = "animals", remove_most_frequent_dummy = TRUE)), 3) expect_equal(ncol(dummy_cols(crime, remove_most_frequent_dummy = TRUE)), 4) expect_equal(ncol(dummy_cols(crime, select_columns = "city", remove_most_frequent_dummy = TRUE)), 4) expect_equal(ncol(dummy_cols(crime, select_columns = "year", remove_most_frequent_dummy = TRUE)), 4) expect_equal(ncol(dummy_cols(crime, select_columns = c("city", "year"), remove_most_frequent_dummy = TRUE)), 5) expect_equal(ncol(dummy_cols(fastDummies_example, remove_most_frequent_dummy = TRUE)), 6) expect_equal(ncol(dummy_cols(fastDummies_example, select_columns = "gender", remove_most_frequent_dummy = TRUE)), 5) expect_equal(ncol(dummy_cols(fastDummies_example, select_columns = "dates", remove_most_frequent_dummy = TRUE)), 5) expect_equal(ncol(dummy_cols(fastDummies_example, select_columns = c("dates", "gender", "numbers"), remove_most_frequent_dummy = TRUE)), 8) }) test_that("returns expected number of columns - remove selected columns", { expect_equal(ncol(dummy_cols(no_dummies_needed, remove_selected_columns = TRUE)), 4) expect_equal(ncol(dummy_cols(no_dummies_needed, select_columns = "animals", remove_selected_columns = TRUE)), 3) expect_equal(ncol(dummy_cols(crime, remove_selected_columns = TRUE)), 4) expect_equal(ncol(dummy_cols(crime, select_columns = "city", remove_selected_columns = TRUE)), 4) expect_equal(ncol(dummy_cols(crime, select_columns = "year", remove_selected_columns = TRUE)), 4) expect_equal(ncol(dummy_cols(crime, select_columns = c("city", "year"), remove_selected_columns = TRUE)), 5) expect_equal(ncol(dummy_cols(fastDummies_example, remove_selected_columns = TRUE)), 6) expect_equal(ncol(dummy_cols(fastDummies_example, select_columns = "gender", remove_selected_columns = TRUE)), 5) expect_equal(ncol(dummy_cols(fastDummies_example, select_columns = "dates", remove_selected_columns = TRUE)), 5) expect_equal(ncol(dummy_cols(fastDummies_example, select_columns = c("dates", "gender", "numbers"), remove_selected_columns = TRUE)), 8) }) test_that("dummy_cols returns expected number of columns - vector ", { expect_equal(ncol(dummy_cols(fastDummies_example$numbers)), 4) expect_equal(ncol(dummy_cols(fastDummies_example$animals)), 3) expect_equal(ncol(dummy_cols(fastDummies_example$dates)), 3) expect_equal(ncol(dummy_cols(fastDummies_example[, "gender", drop = FALSE])), 3) expect_equal(ncol(dummy_cols(fastDummies_example[, "numbers", drop = FALSE])), 4) expect_equal(ncol(dummy_cols(fastDummies_example[, "dates", drop = FALSE])), 3) expect_equal(ncol(dummy_cols(1:100)), 101) }) fastDummies/tests/testthat/test-columns.R0000644000176200001440000003275713760474316020346 0ustar liggesuserscontext("Makes correct dummy columns") load(system.file("testdata", "fastDummies_data.rda", package = "fastDummies")) most_frequent <- data.frame(animal = c("dog", "cat", "cat", "gorilla", "gorilla", "gorilla"), day = c("monday", "tuesday", "wednesday", "wednesday", "friday", "saturday"), hour = 1:6) sort_order_example <- data.frame(numbers = 1:12, month = c("February", "January", "March", "July", "June", "May", "April", "August", "October", "September", "December", "November")) sort_order_example$month <- factor(sort_order_example$month, levels = as.character(sort_order_example$month)) sort_order_example2 <- sort_order_example sort_order_example2$month <- as.character(sort_order_example2$month) fastDummies_example2 <- fastDummies_example fastDummies_example2$gender <- as.character(fastDummies_example2$gender) fastDummies_example2$animals <- as.character(fastDummies_example2$animals) test_that("The correct dummy columns are made - default", { expect_named(dummy_cols(sort_order_example), c("numbers", "month", "month_February", "month_January", "month_March", "month_July", "month_June", "month_May", "month_April", "month_August", "month_October", "month_September", "month_December", "month_November")) expect_named(dummy_cols(c("a", "b")), c(".data", ".data_a", ".data_b")) expect_named(dummy_cols(1:2), c(".data", ".data_1", ".data_2")) expect_named(dummy_cols(sort_order_example2), c("numbers", "month", "month_April", "month_August", "month_December", "month_February", "month_January", "month_July", "month_June", "month_March", "month_May", "month_November", "month_October", "month_September")) expect_named(dummy_cols(fastDummies_example), c("numbers", "gender", "animals", "dates", "gender_female", "gender_male", "animals_cat", "animals_dog")) expect_named(dummy_cols(fastDummies_example2), c("numbers", "gender", "animals", "dates", "gender_female", "gender_male", "animals_cat", "animals_dog")) expect_named(dummy_cols(fastDummies_example[, "gender", drop = FALSE]), c("gender", "gender_female", "gender_male")) expect_named(dummy_cols(fastDummies_example[, "animals", drop = FALSE]), c("animals", "animals_cat", "animals_dog")) expect_named(dummy_cols(fastDummies_example2[, "gender", drop = FALSE]), c("gender", "gender_female", "gender_male")) expect_named(dummy_cols(fastDummies_example2[, "animals", drop = FALSE]), c("animals", "animals_cat", "animals_dog")) expect_named(dummy_cols(fastDummies_example[, "numbers", drop = FALSE]), c("numbers", "numbers_1", "numbers_2", "numbers_3")) }) test_that("The correct dummy columns are made - select_columns", { expect_named(dummy_cols(fastDummies_example[, "gender", drop = FALSE], select_columns = "gender"), c("gender", "gender_female", "gender_male")) expect_named(dummy_cols(fastDummies_example, select_columns = "numbers"), c("numbers", "gender", "animals", "dates", "numbers_1", "numbers_2", "numbers_3")) expect_named(dummy_cols(fastDummies_example[, "animals", drop = FALSE]), c("animals", "animals_cat", "animals_dog")) # animal first in select_columns expect_named(dummy_cols(fastDummies_example, select_columns = c("animals", "gender")), c("numbers", "gender", "animals", "dates", "animals_cat", "animals_dog", "gender_female", "gender_male")) # gender first in select_columns expect_named(dummy_cols(fastDummies_example, select_columns = c("gender", "animals")), c("numbers", "gender", "animals", "dates", "gender_female", "gender_male", "animals_cat", "animals_dog")) expect_named(dummy_cols(fastDummies_example, select_columns = "animals"), c("numbers", "gender", "animals", "dates", "animals_cat", "animals_dog")) expect_named(dummy_cols(fastDummies_example, select_columns = "gender"), c("numbers", "gender", "animals", "dates", "gender_female", "gender_male")) expect_named(dummy_cols(fastDummies_example, select_columns = c("gender", "numbers")), c("numbers", "gender", "animals", "dates", "gender_female", "gender_male", "numbers_1", "numbers_2", "numbers_3")) }) test_that("Remove first dummy leads to proper dummy columns being made", { expect_named(dummy_cols(fastDummies_example[, "gender", drop = FALSE], remove_first_dummy = TRUE), c("gender", "gender_male")) expect_named(dummy_cols(fastDummies_example[, "numbers", drop = FALSE], remove_first_dummy = TRUE), c("numbers", "numbers_2", "numbers_3")) expect_named(dummy_cols(fastDummies_example[, "animals", drop = FALSE], remove_first_dummy = TRUE), c("animals", "animals_dog")) expect_named(dummy_cols(fastDummies_example, remove_first_dummy = TRUE), c("numbers", "gender", "animals", "dates", "gender_male", "animals_dog")) expect_named(dummy_cols(fastDummies_example, select_columns = c("gender", "animals"), remove_first_dummy = TRUE), c("numbers", "gender", "animals", "dates", "gender_male", "animals_dog")) expect_named(dummy_cols(fastDummies_example, select_columns = "gender", remove_first_dummy = TRUE), c("numbers", "gender", "animals", "dates", "gender_male")) expect_named(dummy_cols(fastDummies_example, select_columns = "animals", remove_first_dummy = TRUE), c("numbers", "gender", "animals", "dates", "animals_dog")) expect_named(dummy_cols(fastDummies_example, select_columns = "numbers", remove_first_dummy = TRUE), c("numbers", "gender", "animals", "dates", "numbers_2", "numbers_3")) expect_named(dummy_cols(fastDummies_example, select_columns = c("animals", "numbers"), remove_first_dummy = TRUE), c("numbers", "gender", "animals", "dates", "animals_dog", "numbers_2", "numbers_3")) }) test_that("remove_most_frequent_dummy works", { expect_named(dummy_cols(.data = data.frame(X = as.factor(c("a", "b", "b", "c", "c"))), remove_most_frequent_dummy = TRUE), c("X", "X_a", "X_c")) expect_named(dummy_cols(most_frequent, remove_most_frequent_dummy = TRUE), c("animal", "day", "hour", "animal_cat", "animal_dog", "day_friday", "day_monday", "day_saturday", "day_tuesday")) expect_named(dummy_cols(most_frequent, select_columns = c("animal", "day"), remove_most_frequent_dummy = TRUE), c("animal", "day", "hour", "animal_cat", "animal_dog", "day_friday", "day_monday", "day_saturday", "day_tuesday")) expect_named(dummy_cols(most_frequent, select_columns = "animal", remove_most_frequent_dummy = TRUE), c("animal", "day", "hour", "animal_cat", "animal_dog")) expect_named(dummy_cols(most_frequent, select_columns = "day", remove_most_frequent_dummy = TRUE), c("animal", "day", "hour", "day_friday", "day_monday", "day_saturday", "day_tuesday")) expect_named(dummy_cols(most_frequent, select_columns = "hour", remove_most_frequent_dummy = TRUE), c("animal", "day", "hour", "hour_2", "hour_3", "hour_4", "hour_5", "hour_6")) }) test_that("remove_selected_columns works", { expect_named(dummy_cols(most_frequent, remove_selected_columns = TRUE), c("hour", "animal_cat", "animal_dog", "animal_gorilla", "day_friday", "day_monday", "day_saturday", "day_tuesday", "day_wednesday")) expect_named(dummy_cols(most_frequent, select_columns = c("animal", "day"), remove_selected_columns = TRUE), c("hour", "animal_cat", "animal_dog", "animal_gorilla", "day_friday", "day_monday", "day_saturday", "day_tuesday", "day_wednesday")) expect_named(dummy_cols(most_frequent, select_columns = "animal", remove_selected_columns = TRUE), c("day", "hour", "animal_cat", "animal_dog", "animal_gorilla")) expect_named(dummy_cols(most_frequent, select_columns = "day", remove_selected_columns = TRUE), c("animal", "hour", "day_friday", "day_monday", "day_saturday", "day_tuesday", "day_wednesday")) expect_named(dummy_cols(most_frequent, select_columns = "hour", remove_selected_columns = TRUE), c( "animal", "day", "hour_1", "hour_2", "hour_3", "hour_4", "hour_5", "hour_6")) }) fastDummies/tests/testthat/test-columns-warnings-errors.R0000644000176200001440000002324013515737037023471 0ustar liggesuserscontext("Checks dummy_cols for warnings and errors") load(system.file("testdata", "fastDummies_data.rda", package = "fastDummies")) error_data <- data.frame(numbers = 1:10, number2 = 11:20, stringsAsFactors = FALSE) test <- structure(list( Theory = structure(c(NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 6L, 6L, 6L, 9L, 9L, NA, 1L, 1L, NA, NA, NA, NA, NA, 1L, 1L, 6L, NA, 1L, 1L, 1L, NA, NA, 1L, 1L, NA, 2L, NA, 1L, 1L, 4L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, NA, 1L, 1L, 1L, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, NA, NA, 1L, NA, NA, 2L, NA, NA, NA, NA, 9L, 9L, 1L, 1L, 1L, 6L, 6L, 1L, 1L, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, NA, NA, NA, NA, 1L, 1L, 1L, 8L, 1L, NA, 6L, 1L, 1L, 1L, NA, NA, NA, NA, NA, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 3L, 1L, 1L, 1L, NA, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 5L, 1L, 1L, 1L, 1L, NA, 1L, 8L, NA, 8L, 8L, NA, NA, NA, NA, 2L, 1L, 2L, 10L, 1L, 1L, 1L, 1L, 1L, NA, NA, NA, 6L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, NA, NA, NA, NA, NA, NA, 1L, NA, 9L, NA, NA, NA, 1L, 1L, 1L, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, NA, 1L, NA, 1L, NA, 1L, 1L, 1L, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, NA, 1L, 1L, NA, 9L, 9L, 9L, 9L, 9L, 9L, 1L, 1L, 1L, 1L, 2L, NA, NA, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 6L, 6L, 6L, 6L, 7L, NA, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, NA, 1L, 1L, NA, 1L, 1L, 1L, 1L, 1L), .Label = c("Behaviourism", "Behaviourism, Cognitive", "Behaviourism, Gestalt", "Behaviourism, Psychodynamic", "Behaviourism, Psychodynamic, Cognitive", "Cognitive", "Functionalism", "Gestalt", "Psychodynamic", "Structuralism"), class = "factor"), Format = structure(c(1L, 1L, 24L, 1L, 1L, 1L, 1L, 2L, 1L, 10L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 12L, 1L, 1L, 2L, 1L, 1L, 19L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 1L, 1L, 1L, 1L, 3L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 12L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 11L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 15L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 13L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 7L, 12L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 8L, 1L, 1L, 1L, 1L, 1L, 5L, 1L, 1L, 1L, 1L, 7L, 1L, 1L, 15L, 1L, 5L, 25L, 5L, 24L, 2L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 15L, 1L, 1L, 1L, 1L, 20L, 1L, 18L, 12L, 1L, 1L, NA, 20L, 20L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 25L, 15L, 16L, 15L, 15L, 1L, 1L, 1L, 1L, 19L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 12L, 12L, 5L, 5L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 14L, 1L, 1L, 1L, 1L, 14L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 15L, 12L, NA, 15L, 1L, NA, NA, 1L, 1L, 6L, 1L, 1L, 1L, 1L, 14L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 12L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 2L, 2L, 2L, 2L, 2L, 1L, 22L, 1L, 21L, 23L, 5L, 1L, 1L, 1L, 1L, 10L, 1L, 1L, 1L, 1L, 5L, 17L, 1L, 17L, 6L, 1L, 1L, 9L, 1L, 1L, 1L, 1L, 1L, 1L, 2L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 12L, 1L, 18L, 1L, 21L, 18L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 5L, 12L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 24L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 1L, 6L, 1L, 1L, 1L, 1L, 1L, 4L, 1L, 1L, 1L), .Label = c("16mm", "16mm, 35mm", "16mm, 35mm, VHS", "16mm, AVI", "16mm, Digital", "16mm, DVD", "16mm, DVD, Betacam SP", "16mm, DVD, Digital, Betacam SP", "16mm, DVD, Mini-DV", "16mm, MP4", "16mm, MPG", "16mm, VHS", "16mm, VHS, AVI", "16mm, VHS, Digital", "16mm, VHS, DVD", "16mm, VHS, DVD, Digital, AVI", "35mm", "8mm", "8mm, 16mm", "DVD", "DVD, AVI", "Mini-DV", "MPG", "VHS", "VHS, DVD, Digital"), class = "factor")), .Names = c("Theory", "Format"), row.names = c(NA, -427L), class = c("tbl_df", "tbl", "data.frame")) test_that("Error on stop conditions", { expect_error(dummy_cols(error_data)) expect_error(dummy_cols(error_data), paste0("No character or factor columns found. ", "Please use select_columns to choose columns.")) }) test_that("Including non-existing in select_columns leads to warning", { expect_warning(dummy_cols(fastDummies_example[, "gender", drop = FALSE], select_columns = c("gender", "fake"))) expect_warning(dummy_cols(fastDummies_example[, "gender", drop = FALSE], select_columns = c("fake", "gender"))) expect_warning(dummy_cols(fastDummies_example[, "gender", drop = FALSE], select_columns = c("fake", "gender", "fake"))) }) test_that("Only having non-existing column in select_columns returns error", { expect_error(dummy_cols(fastDummies_example, select_columns = "number")) expect_error(dummy_cols(fastDummies_example[, "numbers", drop = FALSE], select_columns = "number")) expect_error(dummy_cols(fastDummies_example[, "gender", drop = FALSE], select_columns = "gen")) expect_error(dummy_cols(fastDummies_example, select_columns = "")) expect_error(dummy_cols(no_dummies_needed, select_columns = "")) expect_error(dummy_cols(crime, select_columns = "")) expect_error(dummy_cols(fastDummies_example[, "gender", drop = FALSE], select_columns = "")) }) test_that("no errors or warnings", { expect_silent(dummy_cols(fastDummies_example)) expect_silent(dummy_cols(no_dummies_needed)) expect_silent(dummy_cols(no_dummies_needed_DT)) expect_silent(dummy_cols(no_dummies_needed_tibble)) expect_silent(dummy_cols(crime)) expect_silent(dummy_cols(crime_full)) expect_silent(dummy_cols(crime_DT)) expect_silent(dummy_cols(crime_full_DT)) expect_silent(dummy_cols(crime_tibble)) expect_silent(dummy_cols(crime_full_tibble)) expect_silent(dummy_cols(fastDummies_example)) expect_silent(dummy_cols(fastDummies_example_DT)) expect_silent(dummy_cols(fastDummies_example_tibble)) expect_silent(dummy_cols(fastDummies_full)) expect_silent(dummy_cols(fastDummies_full_DT)) expect_silent(dummy_cols(fastDummies_full_tibble)) expect_silent(dummy_cols(test, select_columns = "Theory", split = ", ")) expect_silent(dummy_cols(test, select_columns = "Theory", split = ",")) }) test_that("error if both remove options are true", { expect_error(dummy_cols(no_dummies_needed, remove_first_dummy = TRUE, remove_most_frequent_dummy = TRUE)) expect_error(dummy_cols(no_dummies_needed_DT, remove_first_dummy = TRUE, remove_most_frequent_dummy = TRUE)) expect_error(dummy_cols(no_dummies_needed_tibble, remove_first_dummy = TRUE, remove_most_frequent_dummy = TRUE)) expect_error(dummy_cols(crime, remove_first_dummy = TRUE, remove_most_frequent_dummy = TRUE)) expect_error(dummy_cols(crime_full, remove_first_dummy = TRUE, remove_most_frequent_dummy = TRUE)) expect_error(dummy_cols(crime_DT, remove_first_dummy = TRUE, remove_most_frequent_dummy = TRUE)) expect_error(dummy_cols(crime_full_DT, remove_first_dummy = TRUE, remove_most_frequent_dummy = TRUE)) expect_error(dummy_cols(crime_tibble, remove_first_dummy = TRUE, remove_most_frequent_dummy = TRUE)) expect_error(dummy_cols(crime_full_tibble, remove_first_dummy = TRUE, remove_most_frequent_dummy = TRUE)) expect_error(dummy_cols(fastDummies_example, remove_first_dummy = TRUE, remove_most_frequent_dummy = TRUE)) expect_error(dummy_cols(fastDummies_example_DT, remove_first_dummy = TRUE, remove_most_frequent_dummy = TRUE)) expect_error(dummy_cols(fastDummies_example_tibble, remove_first_dummy = TRUE, remove_most_frequent_dummy = TRUE)) expect_error(dummy_cols(fastDummies_full, remove_first_dummy = TRUE, remove_most_frequent_dummy = TRUE)) expect_error(dummy_cols(fastDummies_full_DT, remove_first_dummy = TRUE, remove_most_frequent_dummy = TRUE)) expect_error(dummy_cols(fastDummies_full_tibble, remove_first_dummy = TRUE, remove_most_frequent_dummy = TRUE)) }) fastDummies/tests/testthat/test-rows-indicator-values.R0000644000176200001440000000432613224606712023106 0ustar liggesuserscontext("dummy_indicator is only integers 0 and 1") load(system.file("testdata", "fastDummies_data.rda", package = "fastDummies")) test_that("dummy_indicator is binary column", { # With dummy_indicator TRUE expect_true( (unique(dummy_rows(no_dummies_needed, dummy_indicator = TRUE)$dummy_indicator) %in% c(0)) ) expect_true(all(unique(dummy_rows(fastDummies_example, dummy_indicator = TRUE)$dummy_indicator) %in% 0:1)) expect_true(all(unique(dummy_rows(crime, dummy_indicator = TRUE)$dummy_indicator) %in% 0:1)) # with set dummy_value expect_true(all(unique(dummy_rows(no_dummies_needed, dummy_indicator = TRUE, dummy_value = "test")$dummy_indicator) %in% 0:1)) expect_true(all(unique(dummy_rows(fastDummies_example, dummy_indicator = TRUE, dummy_value = "test")$dummy_indicator) %in% 0:1)) expect_true(all(unique(dummy_rows(crime, dummy_indicator = TRUE, dummy_value = "test")$dummy_indicator) %in% 0:1)) # With columns selected TRUE expect_true(all(unique(dummy_rows(no_dummies_needed, dummy_indicator = TRUE, select_columns = "animals")$dummy_indicator) %in% 0)) expect_true(all(unique(dummy_rows(fastDummies_example, dummy_indicator = TRUE, select_columns = "dates")$dummy_indicator) %in% 0)) expect_true(all(unique(dummy_rows(fastDummies_example, dummy_indicator = TRUE, select_columns = "animals")$dummy_indicator) %in% 0)) expect_true(all(unique(dummy_rows(crime, dummy_indicator = TRUE, select_columns = "crime")$dummy_indicator) %in% 0:1)) }) fastDummies/tests/testthat/test-columns-value-order.R0000644000176200001440000003063213760474244022557 0ustar liggesuserscontext("Order of dummy column values are right") load(system.file("testdata", "fastDummies_data.rda", package = "fastDummies")) split_example <- data.frame(owner = 1:4, pets = c("dog", "dog, cat, hamster", "cat", "hamster"), stringsAsFactors = FALSE) numeric_order <- data.frame(photos = c(1, 5, 7, 2, 40, 23, 12, 6, 1)) test_that("Order of dummy columns (e.g. 0,0,1,0) is right", { expect_equal(dummy_cols(1:3)$.data_1, c(1, 0, 0)) expect_equal(dummy_cols(1:3)$.data_2, c(0, 1, 0)) expect_equal(dummy_cols(1:3)$.data_3, c(0, 0, 1)) expect_equal(dummy_cols(c("a", "b", "c"))$.data_a, c(1, 0, 0)) expect_equal(dummy_cols(c("a", "b", "c"))$.data_b, c(0, 1, 0)) expect_equal(dummy_cols(c("a", "b", "c"))$.data_c, c(0, 0, 1)) expect_equal(dummy_cols(fastDummies_example)$gender_female, c(0, 0, 1)) expect_equal(dummy_cols(fastDummies_example)$gender_male, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example)$gender_female, c(0, 0, 1)) expect_equal(dummy_cols(fastDummies_example, select_columns = "gender")$gender_male, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example, remove_first_dummy = TRUE)$gender_male, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example, select_columns = "gender", remove_first_dummy = TRUE)$gender_male, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example)$animals_dog, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example)$animals_cat, c(0, 0, 1)) expect_equal(dummy_cols(fastDummies_example, remove_first_dummy = TRUE)$animals_dog, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example, select_columns = "animals", remove_first_dummy = TRUE)$animals_dog, c(1, 1, 0)) expect_named(dummy_cols(numeric_order), c("photos", "photos_1", "photos_2", "photos_5", "photos_6", "photos_7", "photos_12", "photos_23", "photos_40")) # Splitter test expect_equal(dummy_cols(split_example, split = ",")$pets_dog, c(1, 1, 0, 0)) expect_equal(dummy_cols(split_example, split = ",")$pets_cat, c(0, 1, 1, 0)) expect_equal(dummy_cols(split_example, split = ",")$pets_hamster, c(0, 1, 0, 1)) expect_equal(dummy_cols(fastDummies_example_DT)$gender_male, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example_DT)$gender_female, c(0, 0, 1)) expect_equal(dummy_cols(fastDummies_example_DT, select_columns = "gender")$gender_male, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example_DT, remove_first_dummy = TRUE)$gender_male, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example_DT, select_columns = "gender", remove_first_dummy = TRUE)$gender_male, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example_DT)$animals_dog, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example_DT)$animals_cat, c(0, 0, 1)) expect_equal(dummy_cols(fastDummies_example_DT, remove_first_dummy = TRUE)$animals_dog, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example_DT, select_columns = "animals", remove_first_dummy = TRUE)$animals_dog, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example_tibble)$gender_male, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example_tibble)$gender_female, c(0, 0, 1)) expect_equal(dummy_cols(fastDummies_example_tibble, select_columns = "gender")$gender_male, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example_tibble, remove_first_dummy = TRUE)$gender_male, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example_tibble, select_columns = "gender", remove_first_dummy = TRUE)$gender_male, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example_tibble)$animals_dog, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example_tibble)$animals_cat, c(0, 0, 1)) expect_equal(dummy_cols(fastDummies_example_tibble, remove_first_dummy = TRUE)$animals_dog, c(1, 1, 0)) expect_equal(dummy_cols(fastDummies_example_tibble, select_columns = "animals", remove_first_dummy = TRUE)$animals_dog, c(1, 1, 0)) }) test_that("Order of non-dummy columns is same", { gender_list <- factor(c("male", "male", "female")) animals_list <- factor(c("dog", "dog", "cat")) expect_equal(dummy_cols(fastDummies_example)$numbers, fastDummies_example$numbers) expect_equal(dummy_cols(fastDummies_example, remove_first_dummy = TRUE)$numbers, fastDummies_example$numbers) expect_equal(dummy_cols(fastDummies_example, select_columns = "gender")$numbers, fastDummies_example$numbers) expect_equal(dummy_cols(fastDummies_example, select_columns = "animals")$numbers, fastDummies_example$numbers) expect_equal(dummy_cols(fastDummies_example, select_columns = "gender", remove_first_dummy = TRUE)$numbers, fastDummies_example$numbers) expect_equal(dummy_cols(fastDummies_example, select_columns = "animals", remove_first_dummy = TRUE)$numbers, fastDummies_example$numbers) expect_equal(dummy_cols(fastDummies_example)$gender, gender_list) expect_equal(dummy_cols(fastDummies_example, remove_first_dummy = TRUE)$gender, gender_list) expect_equal(dummy_cols(fastDummies_example, select_columns = "gender")$gender, gender_list) expect_equal(dummy_cols(fastDummies_example, select_columns = "animals")$gender, gender_list) expect_equal(dummy_cols(fastDummies_example, select_columns = "gender", remove_first_dummy = TRUE)$gender, gender_list) expect_equal(dummy_cols(fastDummies_example, select_columns = "animals", remove_first_dummy = TRUE)$gender, gender_list) expect_equal(dummy_cols(fastDummies_example)$animals, animals_list) expect_equal(dummy_cols(fastDummies_example, remove_first_dummy = TRUE)$animals, animals_list) expect_equal(dummy_cols(fastDummies_example, select_columns = "gender")$animals, animals_list) expect_equal(dummy_cols(fastDummies_example, select_columns = "animals")$animals, animals_list) expect_equal(dummy_cols(fastDummies_example, select_columns = "gender", remove_first_dummy = TRUE)$animals, animals_list) expect_equal(dummy_cols(fastDummies_example, select_columns = "animals", remove_first_dummy = TRUE)$animals, animals_list) expect_equal(dummy_cols(fastDummies_example_DT)$numbers, fastDummies_example_DT$numbers) expect_equal(dummy_cols(fastDummies_example_DT, remove_first_dummy = TRUE)$numbers, fastDummies_example_DT$numbers) expect_equal(dummy_cols(fastDummies_example_DT, select_columns = "gender")$numbers, fastDummies_example_DT$numbers) expect_equal(dummy_cols(fastDummies_example_DT, select_columns = "animals")$numbers, fastDummies_example_DT$numbers) expect_equal(dummy_cols(fastDummies_example_DT, select_columns = "gender", remove_first_dummy = TRUE)$numbers, fastDummies_example_DT$numbers) expect_equal(dummy_cols(fastDummies_example_DT, select_columns = "animals", remove_first_dummy = TRUE)$numbers, fastDummies_example_DT$numbers) expect_equal(dummy_cols(fastDummies_example_DT)$gender, gender_list) expect_equal(dummy_cols(fastDummies_example_DT, remove_first_dummy = TRUE)$gender, gender_list) expect_equal(dummy_cols(fastDummies_example_DT, select_columns = "gender")$gender, gender_list) expect_equal(dummy_cols(fastDummies_example_DT, select_columns = "animals")$gender, gender_list) expect_equal(dummy_cols(fastDummies_example_DT, select_columns = "gender", remove_first_dummy = TRUE)$gender, gender_list) expect_equal(dummy_cols(fastDummies_example_DT, select_columns = "animals", remove_first_dummy = TRUE)$gender, gender_list) expect_equal(dummy_cols(fastDummies_example_DT)$animals, animals_list) expect_equal(dummy_cols(fastDummies_example_DT, remove_first_dummy = TRUE)$animals, animals_list) expect_equal(dummy_cols(fastDummies_example_DT, select_columns = "gender")$animals, animals_list) expect_equal(dummy_cols(fastDummies_example_DT, select_columns = "animals")$animals, animals_list) expect_equal(dummy_cols(fastDummies_example_DT, select_columns = "gender", remove_first_dummy = TRUE)$animals, animals_list) expect_equal(dummy_cols(fastDummies_example_DT, select_columns = "animals", remove_first_dummy = TRUE)$animals, animals_list) expect_equal(dummy_cols(fastDummies_example_tibble)$numbers, fastDummies_example_tibble$numbers) expect_equal(dummy_cols(fastDummies_example_tibble, remove_first_dummy = TRUE)$numbers, fastDummies_example_tibble$numbers) expect_equal(dummy_cols(fastDummies_example_tibble, select_columns = "gender")$numbers, fastDummies_example_tibble$numbers) expect_equal(dummy_cols(fastDummies_example_tibble, select_columns = "animals")$numbers, fastDummies_example_tibble$numbers) expect_equal(dummy_cols(fastDummies_example_tibble, select_columns = "gender", remove_first_dummy = TRUE)$numbers, fastDummies_example_tibble$numbers) expect_equal(dummy_cols(fastDummies_example_tibble, select_columns = "animals", remove_first_dummy = TRUE)$numbers, fastDummies_example_tibble$numbers) expect_equal(dummy_cols(fastDummies_example_tibble)$gender, gender_list) expect_equal(dummy_cols(fastDummies_example_tibble, remove_first_dummy = TRUE)$gender, gender_list) expect_equal(dummy_cols(fastDummies_example_tibble, select_columns = "gender")$gender, gender_list) expect_equal(dummy_cols(fastDummies_example_tibble, select_columns = "animals")$gender, gender_list) expect_equal(dummy_cols(fastDummies_example_tibble, select_columns = "gender", remove_first_dummy = TRUE)$gender, gender_list) expect_equal(dummy_cols(fastDummies_example_tibble, select_columns = "animals", remove_first_dummy = TRUE)$gender, gender_list) expect_equal(dummy_cols(fastDummies_example_tibble)$animals, animals_list) expect_equal(dummy_cols(fastDummies_example_tibble, remove_first_dummy = TRUE)$animals, animals_list) expect_equal(dummy_cols(fastDummies_example_tibble, select_columns = "gender")$animals, animals_list) expect_equal(dummy_cols(fastDummies_example_tibble, select_columns = "animals")$animals, animals_list) expect_equal(dummy_cols(fastDummies_example_tibble, select_columns = "gender", remove_first_dummy = TRUE)$animals, animals_list) expect_equal(dummy_cols(fastDummies_example_tibble, select_columns = "animals", remove_first_dummy = TRUE)$animals, animals_list) }) fastDummies/tests/testthat/test-rows-dimensions.R0000644000176200001440000001104713313007342021774 0ustar liggesuserscontext("dummy_rows returns proper dimensions") load(system.file("testdata", "fastDummies_data.rda", package = "fastDummies")) test_that("dummy_rows returns same number of columns as inputted", { expect_equal(ncol(dummy_rows(no_dummies_needed)), ncol(no_dummies_needed)) expect_equal(ncol(dummy_rows(fastDummies_example)), ncol(fastDummies_example)) expect_equal(ncol(dummy_rows(crime)), ncol(crime)) expect_equal(ncol(dummy_rows(no_dummies_needed_DT)), ncol(no_dummies_needed_DT)) expect_equal(ncol(dummy_rows(fastDummies_example_DT)), ncol(fastDummies_example_DT)) expect_equal(ncol(dummy_rows(crime_DT)), ncol(crime_DT)) expect_equal(ncol(dummy_rows(no_dummies_needed_tibble)), ncol(no_dummies_needed_tibble)) expect_equal(ncol(dummy_rows(fastDummies_example_tibble)), ncol(fastDummies_example_tibble)) expect_equal(ncol(dummy_rows(crime_tibble)), ncol(crime_tibble)) # With dummy_indicator TRUE expect_equal(ncol(dummy_rows(no_dummies_needed, dummy_indicator = TRUE)), ncol(no_dummies_needed) + 1) expect_equal(ncol(dummy_rows(fastDummies_example, dummy_indicator = TRUE)), ncol(fastDummies_example) + 1) expect_equal(ncol(dummy_rows(crime, dummy_indicator = TRUE)), ncol(crime) + 1) expect_equal(ncol(dummy_rows(no_dummies_needed_DT, dummy_indicator = TRUE)), ncol(no_dummies_needed_DT) + 1) expect_equal(ncol(dummy_rows(fastDummies_example_DT, dummy_indicator = TRUE)), ncol(fastDummies_example_DT) + 1) expect_equal(ncol(dummy_rows(crime_DT, dummy_indicator = TRUE)), ncol(crime_DT) + 1) expect_equal(ncol(dummy_rows(no_dummies_needed_tibble, dummy_indicator = TRUE)), ncol(no_dummies_needed_tibble) + 1) expect_equal(ncol(dummy_rows(fastDummies_example_tibble, dummy_indicator = TRUE)), ncol(fastDummies_example_tibble) + 1) expect_equal(ncol(dummy_rows(crime_tibble, dummy_indicator = TRUE)), ncol(crime_tibble) + 1) }) test_that("Number of rows is as expected", { expect_equal(nrow(dummy_rows(no_dummies_needed)), 4) expect_equal(nrow(dummy_rows(fastDummies_example)), 8) expect_equal(nrow(dummy_rows(crime)), 3) expect_equal(nrow(dummy_rows(crime, select_columns = c("city", "year"))), 4) expect_equal(nrow(dummy_rows(no_dummies_needed_DT)), 4 ) expect_equal(nrow(dummy_rows(fastDummies_example_DT)), 8) expect_equal(nrow(dummy_rows(crime_DT)), 3) expect_equal(nrow(dummy_rows(crime_DT, select_columns = c("city", "year"))), 4) expect_equal(nrow(dummy_rows(no_dummies_needed_tibble)), 4 ) expect_equal(nrow(dummy_rows(fastDummies_example_tibble)), 8) expect_equal(nrow(dummy_rows(crime_tibble)), 3) expect_equal(nrow(dummy_rows(crime_tibble, select_columns = c("city", "year"))), 4) # With dummy_indicator TRUE expect_equal(nrow(dummy_rows(no_dummies_needed, dummy_indicator = TRUE)), 4) expect_equal(nrow(dummy_rows(fastDummies_example, dummy_indicator = TRUE)), 8) expect_equal(nrow(dummy_rows(crime, dummy_indicator = TRUE)), 3) expect_equal(nrow(dummy_rows(crime, dummy_indicator = TRUE, select_columns = c("city", "year"))), 4) expect_equal(nrow(dummy_rows(no_dummies_needed_DT, dummy_indicator = TRUE)), 4) expect_equal(nrow(dummy_rows(fastDummies_example_DT, dummy_indicator = TRUE)), 8) expect_equal(nrow(dummy_rows(crime_DT, dummy_indicator = TRUE)), 3) expect_equal(nrow(dummy_rows(crime_DT, dummy_indicator = TRUE, select_columns = c("city", "year"))), 4) expect_equal(nrow(dummy_rows(no_dummies_needed_tibble, dummy_indicator = TRUE)), 4) expect_equal(nrow(dummy_rows(fastDummies_example_tibble, dummy_indicator = TRUE)), 8) expect_equal(nrow(dummy_rows(crime_tibble, dummy_indicator = TRUE)), 3) expect_equal(nrow(dummy_rows(crime_tibble, dummy_indicator = TRUE, select_columns = c("city", "year"))), 4) }) fastDummies/tests/testthat/test-columns-type.R0000644000176200001440000000306513760474162021312 0ustar liggesuserscontext("Original Columns don't change type") load(system.file("testdata", "fastDummies_data.rda", package = "fastDummies")) fastDummies_example_character <- fastDummies_example fastDummies_example_character$animals <- as.character(fastDummies_example_character$animals) test_that("Original columns keep same type", { expect_is(dummy_cols(fastDummies_example)$numbers, "integer") expect_is(dummy_cols(fastDummies_example)$animals, "factor") expect_is(dummy_cols(fastDummies_example_character)$animals, "character") expect_is(dummy_cols(fastDummies_example)$dates, "Date") expect_is(dummy_cols(fastDummies_example, remove_first_dummy = TRUE)$numbers, "integer") expect_is(dummy_cols(fastDummies_example, remove_first_dummy = TRUE)$animals, "factor") expect_is(dummy_cols(fastDummies_example, remove_first_dummy = TRUE)$dates, "Date") expect_is(dummy_cols(c("a", "b", "c"))$.data, "character") expect_is(dummy_cols(c(1.1, 1.2, 1.3))$.data, "numeric") }) test_that("New columns are integer", { expect_is(dummy_cols(fastDummies_example)$gender_male, "integer") expect_is(dummy_cols(fastDummies_example, select_columns = "numbers")$numbers_1, "integer") expect_is(dummy_cols(fastDummies_example, select_columns = "dates")[, "dates_2012-01-01"], "integer") expect_is(dummy_cols(c("a", "b", "c"))$.data_a, "integer") expect_is(dummy_cols(c(1.1, 1.2, 1.3))$.data_1.1, "integer") }) fastDummies/tests/testthat/test-ignore_na.R0000644000176200001440000000571114444355170020611 0ustar liggesuserscontext("test-ignore_na") na_test <- data.frame(numbers = 1:5, animals = c("cat", "dog", NA, "dog", NA), stringsAsFactors = FALSE) most_frequent <- data.frame(animal = c("dog", NA, "cat", NA, "gorilla", "gorilla"), day = c("monday", "tuesday", "wednesday", "wednesday", "friday", "saturday"), hour = 1:6) test_that("ignore-na parameter works", { expect_named(dummy_cols(na_test), c("numbers", "animals", "animals_cat", "animals_dog", "animals_NA")) expect_named(dummy_cols(na_test, ignore_na = TRUE), c("numbers", "animals", "animals_cat", "animals_dog")) expect_named(dummy_cols(most_frequent, select_columns = "animal"), c("animal", "day", "hour", "animal_cat", "animal_dog", "animal_gorilla", "animal_NA")) expect_named(dummy_cols(most_frequent, select_columns = "animal", ignore_na = TRUE), c("animal", "day", "hour", "animal_cat", "animal_dog", "animal_gorilla")) expect_equal(dummy_cols(most_frequent, select_columns = "animal")$animal_cat, c(0, NA, 1, NA ,0 ,0)) expect_equal(dummy_cols(most_frequent, ignore_na = TRUE)$animal_cat, c(0, NA, 1, NA ,0 ,0)) expect_equal(dummy_cols(most_frequent, select_columns = "animal")$animal_gorilla, c(0, NA, 0, NA, 1, 1)) expect_equal(dummy_cols(most_frequent, ignore_na = TRUE)$animal_gorilla, c(0, NA, 0, NA, 1, 1)) expect_equal(dummy_cols(most_frequent, select_columns = "animal")$animal_NA, c(0, 1, 0, 1, 0, 0)) expect_equal(dummy_cols(na_test)$animals_cat, c(1, 0, NA, 0, NA)) expect_equal(dummy_cols(na_test)$animals_dog, c(0, 1, NA, 1, NA)) expect_equal(dummy_cols(na_test)$animals_NA, c(0, 0, 1, 0, 1)) expect_equal(dummy_cols(na_test, ignore_na = TRUE)$animals_cat, c(1, 0, NA, 0, NA)) expect_equal(dummy_cols(na_test, ignore_na = TRUE)$animals_dog, c(0, 1, NA, 1, NA)) }) fastDummies/tests/testthat/test-return-type.R0000644000176200001440000000573713533224451021151 0ustar liggesuserscontext("test-return-type") load(system.file("testdata", "fastDummies_data.rda", package = "fastDummies")) test_that("tibble input returns tibble", { expect_is(dummy_cols(tibble::as_tibble(crime)), "tbl_df") expect_is(dummy_cols(tibble::as_tibble(crime[, "city", drop = FALSE])), "tbl_df") expect_is(dummy_cols(tibble::as_tibble(crime[, "year", drop = FALSE])), "tbl_df") expect_is(dummy_cols(tibble::as_tibble(crime_full)), "tbl_df") expect_is(dummy_cols(tibble::as_tibble(fastDummies_example)), "tbl_df") expect_is(dummy_cols(tibble::as_tibble(fastDummies_full)), "tbl_df") expect_is(dummy_cols(tibble::as_tibble(no_dummies_needed)), "tbl_df") expect_is(dummy_rows(tibble::as_tibble(crime)), "tbl_df") expect_is(dummy_rows(tibble::as_tibble(crime_full)), "tbl_df") expect_is(dummy_rows(tibble::as_tibble(fastDummies_example)), "tbl_df") expect_is(dummy_rows(tibble::as_tibble(fastDummies_full)), "tbl_df") expect_is(dummy_rows(tibble::as_tibble(no_dummies_needed)), "tbl_df") }) test_that("data.frame input returns data.frame", { expect_is(dummy_cols(crime), "data.frame") expect_is(dummy_cols(crime$city), "data.frame") expect_is(dummy_cols(crime$year), "data.frame") expect_is(dummy_cols(crime_full), "data.frame") expect_is(dummy_cols(fastDummies_example), "data.frame") expect_is(dummy_cols(fastDummies_full), "data.frame") expect_is(dummy_cols(no_dummies_needed), "data.frame") expect_is(dummy_rows(crime), "data.frame") expect_is(dummy_rows(crime_full), "data.frame") expect_is(dummy_rows(fastDummies_example), "data.frame") expect_is(dummy_rows(fastDummies_full), "data.frame") expect_is(dummy_rows(no_dummies_needed), "data.frame") }) test_that("data.table input returns data.table", { expect_is(dummy_cols(data.table::as.data.table(crime)), "data.table") expect_is(dummy_cols(data.table::as.data.table(crime$city)), "data.table") expect_is(dummy_cols(data.table::as.data.table(crime$year)), "data.table") expect_is(dummy_cols(data.table::as.data.table(crime_full)), "data.table") expect_is(dummy_cols(data.table::as.data.table(fastDummies_example)), "data.table") expect_is(dummy_cols(data.table::as.data.table(fastDummies_full)), "data.table") expect_is(dummy_cols(data.table::as.data.table(no_dummies_needed)), "data.table") expect_is(dummy_rows(data.table::as.data.table(crime)), "data.table") expect_is(dummy_rows(data.table::as.data.table(crime_full)), "data.table") expect_is(dummy_rows(data.table::as.data.table(fastDummies_example)), "data.table") expect_is(dummy_rows(data.table::as.data.table(fastDummies_full)), "data.table") expect_is(dummy_rows(data.table::as.data.table(no_dummies_needed)), "data.table") }) fastDummies/tests/testthat.R0000644000176200001440000000010613220107164015651 0ustar liggesuserslibrary(testthat) library(fastDummies) test_check("fastDummies") fastDummies/vignettes/0000755000176200001440000000000014451271273014551 5ustar liggesusersfastDummies/vignettes/making-dummy-variables.Rmd0000644000176200001440000001003613223763615021564 0ustar liggesusers--- title: "Making dummy variables with dummy_cols()" author: "Jacob Kaplan" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Making dummy variables with dummy_cols()} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- Dummy variables (or binary variables) are commonly used in statistical analyses and in more simple descriptive statistics. A dummy column is one which has a value of one when a categorical event occurs and a zero when it doesn't occur. In most cases this is a feature of the event/person/object being described. For example, if the dummy variable was for occupation being an R programmer, you can ask, "is this person an R programmer?" When the answer is yes, they get a value of 1, when it is no, they get a value of 0. We'll start with a simple example and then go into using the function `dummy_cols()`. You can also use the function `dummy_columns()` which is identical to `dummy_cols()`. Imagine you have a data set about animals in a local shelter. One of the columns in your data is what animal it is: dog or cat. ```{r echo=FALSE} knitr::kable(data.frame(animals = c("dog", "dog", "cat"))) ``` To make dummy columns from this data, you would need to produce two new columns. One would indicate if the animal is a dog, and the other would indicate if the animal is a cat. Each row would get a value of 1 in the column indicating which animal they are, and 0 in the other column. animals | dog | cat --- | --- | --- dog | 1 | 0 dog | 1 | 0 cat | 0 | 1 In the function dummy_cols, the names of these new columns are concatenated to the original column and separated by an underscore. animals | animals_dog | animals_cat --- | --- | --- dog | 1 | 0 dog | 1 | 0 cat | 0 | 1 With an example like this, it is fairly easy to make the dummy columns yourself. `dummy_cols()` automates the process, and is useful when you have many columns to general dummy variables from or with many categories within the column. ```{r setup, echo=TRUE} fastDummies_example <- data.frame(numbers = 1:3, gender = c("male", "male", "female"), animals = c("dog", "dog", "cat"), dates = as.Date(c("2012-01-01", "2011-12-31", "2012-01-01")), stringsAsFactors = FALSE) knitr::kable(fastDummies_example) ``` The object **fastDummies_example** has two character type columns, one integer column, and a Date column. By default, `dummy_cols()` will make dummy variables from factor or character columns only. This is because in most cases those are the only types of data you want dummy variables from. If those are the only columns you want, then the function takes your data set as the first parameter and returns a data.frame with the newly created variables appended to the end of the original data. ```{r echo=TRUE} results <- fastDummies::dummy_cols(fastDummies_example) knitr::kable(results) ``` In some situations, you would want columns with types other than factor and character to generate dummy variables. For example, a column of years would be numeric but could be well-suited for making into dummy variables depending on your analysis. Use the *select_columns* parameter to select specific columns to make dummy variables from. ```{r echo=TRUE} results <- fastDummies::dummy_cols(fastDummies_example, select_columns = "numbers") knitr::kable(results) ``` The final option for `dummy_cols()` is *remove_first_dummy* which by default is FALSE. If TRUE, it removes the first dummy variable created from each column. This is done to avoid multicollinearity in a multiple regression model caused by included all dummy variables. The "first" dummy variable is the one at the top of the rows (i.e. the first value that is not NA). ```{r echo=TRUE} results <- fastDummies::dummy_cols(fastDummies_example, remove_first_dummy = TRUE) knitr::kable(results) ``` fastDummies/vignettes/making-dummy-rows.Rmd0000644000176200001440000000722313223763627020615 0ustar liggesusers--- title: "Making dummy rows with dummy_rows()" author: "Jacob Kaplan" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Making dummy rows with dummy_rows()} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- When dealing with data, there are often missing rows. While truly handling missing data is far beyond the scope of this package, the function `dummy_rows()` lets you add those missing rows back into the data. The function takes all character, factor, and Date columns, finds all possible combinations of their values, and adds the rows that are not in the original data set. Any columns not used in creating the combinations (e.g. numeric) are given a value of NA (unless otherwise specified with *dummy_value*). Lets start with a simple example. ```{r echo=TRUE} fastDummies_example <- data.frame(numbers = 1:3, gender = c("male", "male", "female"), animals = c("dog", "dog", "cat"), dates = as.Date(c("2012-01-01", "2011-12-31", "2012-01-01")), stringsAsFactors = FALSE) knitr::kable(fastDummies_example) ``` This data set has four columns: two character, one Date, and one numeric. The function by default will use the character and Date columns in creating the combinations. First, a small amount of math to explain the combinations. Each column has two distinct values - gender: male & female; animals: dog & cat; dates: 2011-12-31 & 2011-12-31. To find the number of possible combinations, multiple the number of unique values in each column together. 2 \* 2 \* 2 = 8. ```{r echo=TRUE} results <- fastDummies::dummy_rows(fastDummies_example) knitr::kable(results) ``` When we run the function we can see that there are indeed 8 rows possible, and that the 5 rows missing from the original data have been added. To explicitly see which rows are new, set the *dummy_indicator* parameter to TRUE. This provides a column called dummy_indicator with a value of 0 if the row is in the original data and 1 if it was added. ```{r echo=TRUE} results <- fastDummies::dummy_rows(fastDummies_example, dummy_indicator = TRUE) knitr::kable(results) ``` By default, columns not used for making the combinations are given a value of NA in the new rows. You can choose the value given with the parameter *dummy_value*. It takes an input, a string or single number. ```{r echo=TRUE} results1 <- fastDummies::dummy_rows(fastDummies_example, dummy_value = 0) results2 <- fastDummies::dummy_rows(fastDummies_example, dummy_value = "new value") knitr::kable(results1) knitr::kable(results2) ``` The parameter *select_columns* lets you choose which columns to use when making the combinations. It accepts a string or vector of column names. This can come in handy when you want to include a numeric column, such as years, when making the combinations. A new data set will help demonstrate this. This data set shows (imaginary) crime in New York City and San Francisco during 1990 and 2000. The problem is that there is no row for New York City for 2000. We want to add that row. ```{r echo = TRUE} crime <- data.frame(city = c("SF", "SF", "NYC"), year = c(1990, 2000, 1990), crime = 1:3) knitr::kable(crime) ``` Using the default parameters for `dummy_rows()` doesn't give us what we want since it only selects the city column. We need to select both city and year to get all the combinations we want. ```{r echo=TRUE} results <- fastDummies::dummy_rows(crime, select_columns = c("city", "year")) knitr::kable(results) ``` fastDummies/R/0000755000176200001440000000000014444355312012741 5ustar liggesusersfastDummies/R/utils.R0000644000176200001440000000174514451270541014231 0ustar liggesuserscheck_type <- function(.data) { if (data.table::is.data.table(.data)) { data_type <- "is_data_table" } else if (tibble::is_tibble(.data)) { data_type <- "is_tibble" } else { data_type <- "is_data_frame" } return(data_type) } fix_data_type <- function(.data, data_type) { if (data_type == "is_data_frame") { .data <- as.data.frame(.data, stringsAsFactors = FALSE) } else if (data_type == "is_tibble") { .data <- tibble::as_tibble(.data) } return(.data) } .onAttach <- function(libname, pkgname) { package_citation <- "Kaplan, J. & Schlegel, B. (2023). fastDummies: Fast Creation of Dummy (Binary) Columns and Rows from Categorical Variables. Version 1.7.1. URL: https://github.com/jacobkap/fastDummies, https://jacobkap.github.io/fastDummies/." packageStartupMessage("Thank you for using fastDummies!") packageStartupMessage("To acknowledge our work, please cite the package:") packageStartupMessage(package_citation) } fastDummies/R/dummy_cols.R0000644000176200001440000002530214444355312015241 0ustar liggesusers#' Fast creation of dummy variables #' #' Quickly create dummy (binary) columns from character and #' factor type columns in the inputted data (and numeric columns if specified.) #' This function is useful for statistical analysis when you want binary #' columns rather than character columns. #' #' @family dummy functions #' @seealso \code{\link{dummy_rows}} For creating dummy rows #' #' @param .data #' An object with the data set you want to make dummy columns from. #' @param select_columns #' Vector of column names that you want to create dummy variables from. #' If NULL (default), uses all character and factor columns. #' @param remove_first_dummy #' Removes the first dummy of every variable such that only n-1 dummies remain. #' This avoids multicollinearity issues in models. #' @param remove_most_frequent_dummy #' Removes the most frequently observed category such that only n-1 dummies #' remain. If there is a tie for most frequent, will remove the first #' (by alphabetical order) category that is tied for most frequent. #' @param ignore_na #' If TRUE, ignores any NA values in the column. If FALSE (default), then it #' will make a dummy column for value_NA and give a 1 in any row which has a #' NA value. #' @param split #' A string to split a column when multiple categories are in the cell. For #' example, if a variable is Pets and the rows are "cat", "dog", and "turtle", #' each of these pets would become its own dummy column. If one row is "cat, dog", #' then a split value of "," this row would have a value of 1 for both the cat #' and dog dummy columns. #' @param remove_selected_columns #' If TRUE (not default), removes the columns used to generate the dummy columns. #' @param omit_colname_prefix #' If TRUE (not default) and `length(select_columns) == 1`, omit pre-pending the #' name of `select_columns` to the names of the newly generated dummy columns #' #' @return #' A data.frame (or tibble or data.table, depending on input data type) with #' same number of rows as inputted data and original columns plus the newly #' created dummy columns. #' @export #' @examples #' crime <- data.frame(city = c("SF", "SF", "NYC"), #' year = c(1990, 2000, 1990), #' crime = 1:3) #' dummy_cols(crime) #' # Include year column #' dummy_cols(crime, select_columns = c("city", "year")) #' # Remove first dummy for each pair of dummy columns made #' dummy_cols(crime, select_columns = c("city", "year"), #' remove_first_dummy = TRUE) dummy_cols <- function(.data, select_columns = NULL, remove_first_dummy = FALSE, remove_most_frequent_dummy = FALSE, ignore_na = FALSE, split = NULL, remove_selected_columns = FALSE, omit_colname_prefix = FALSE) { stopifnot(is.null(select_columns) || is.character(select_columns), select_columns != "", is.logical(remove_first_dummy), length(remove_first_dummy) == 1, is.logical(remove_selected_columns)) if (remove_first_dummy == TRUE & remove_most_frequent_dummy == TRUE) { stop("Select either 'remove_first_dummy' or 'remove_most_frequent_dummy' to proceed.") } if (is.vector(.data)) { .data <- data.frame(.data = .data, stringsAsFactors = FALSE) } data_type <- check_type(.data) if (!data.table::is.data.table(.data)) { .data <- data.table::as.data.table(.data) } # Grabs column names that are character or factor class ------------------- if (!is.null(select_columns)) { char_cols <- select_columns cols_not_in_data <- char_cols[!char_cols %in% names(.data)] char_cols <- char_cols[!char_cols %in% cols_not_in_data] if (length(char_cols) == 0) { stop("select_columns is/are not in data. Please check data and spelling.") } } else if (ncol(.data) == 1) { char_cols <- names(.data) } else { char_cols <- sapply(.data, class) char_cols <- char_cols[char_cols %in% c("factor", "character")] char_cols <- names(char_cols) } if (length(char_cols) == 0 && is.null(select_columns)) { stop(paste0("No character or factor columns found. ", "Please use select_columns to choose columns.")) } if (!is.null(select_columns) && length(cols_not_in_data) > 0) { warning(paste0("NOTE: The following select_columns input(s) ", "is not a column in data.\n"), paste0(names(cols_not_in_data), "\t")) } for (col_name in char_cols) { # If factor type, order by assigned levels if (is.factor(.data[[col_name]])) { unique_vals <- levels(.data[[col_name]]) if (any(is.na(.data[[col_name]]))) { unique_vals <- c(unique_vals, NA) } # Else by alphabetical order. } else { unique_vals <- unique(.data[[col_name]]) unique_vals <- stringr::str_sort(unique_vals, na_last = TRUE, locale = "en_US", numeric = TRUE) } unique_vals <- as.character(unique_vals) # If there is a split value, splits up the unique_vals by that value # and keeps only the unique ones. if (!is.null(split)) { unique_vals <- unique(trimws(unlist(strsplit(unique_vals, split = split)))) } if (ignore_na) { unique_vals <- unique_vals[!is.na(unique_vals)] } if (remove_most_frequent_dummy) { vals <- as.character(.data[[col_name]]) vals <- data.frame(sort(table(vals), decreasing = TRUE), stringsAsFactors = FALSE) # If there is a actual most frequent value, drop that value. Else, # if there is a tie, drop the one that's first alphabetically. top_vals <- vals[vals$Freq %in% max(vals$Freq), ] other_vals <- vals$vals[!vals$Freq %in% max(vals$Freq)] other_vals <- as.character(other_vals) top_vals <- top_vals[stringr::str_order(top_vals$vals, na_last = TRUE, locale = "en_US", numeric = TRUE), ] if (nrow(top_vals) == 1) { top_vals <- NULL } else { top_vals <- as.character(top_vals$vals[2:nrow(top_vals)]) } unique_vals <- c(top_vals, other_vals) unique_vals <- stringr::str_sort(unique_vals, na_last = TRUE, locale = "en_US", numeric = TRUE) # unique_vals <- vals[order(match(vals, unique_vals))] # if (vals$Freq[1] > vals$Freq[2]) { # vals <- as.character(vals$vals[2:nrow(vals)]) # unique_vals <- unique_vals[which(unique_vals %in% vals)] # unique_vals <- vals[order(match(vals, unique_vals))] # } else { # vals <- vals[vals$Freq %in% max(vals$Freq), ] # vals <- vals[stringr::str_order(vals$vals, # na_last = TRUE, # locale = "en_US", # numeric = TRUE)] # vals <- as.character(vals$vals[2:nrow(vals)]) # unique_vals <- unique_vals[which(unique_vals %in% vals)] # unique_vals <- vals[order(match(vals, unique_vals))] # } } if (remove_first_dummy) { unique_vals <- unique_vals[-1] } data.table::alloc.col(.data, ncol(.data) + length(unique_vals)) # data.table::set(.data, j = paste0(col_name, "_", unique_vals), value = 0L) .data[, paste0(col_name, "_", unique_vals)] <- 0L for (unique_value in unique_vals) { data.table::set(.data, i = which(data.table::chmatch( as.character(.data[[col_name]]), unique_value, nomatch = 0) == 1L), j = paste0(col_name, "_", unique_value), value = 1L) # Sets NA values to NA, only for columns that are not the NA columns if (!is.na(unique_value)) { data.table::set(.data, i = which(is.na(.data[[col_name]])), j = paste0(col_name, "_", unique_value), value = NA) } if (!is.null(split)) { max_split_length <- max(sapply(strsplit(as.character(.data[[col_name]]), split = split), length)) for (split_length in 1:max_split_length) { data.table::set(.data, i = which(data.table::chmatch( as.character(trimws(sapply(strsplit(as.character(.data[[col_name]]), split = split), `[`, split_length))), unique_value, nomatch = 0) == 1L), j = paste0(col_name, "_", unique_value), value = 1L) } if (is.na(unique_value)) { .data[[paste0(col_name, "_", unique_value)]][which(!is.na(.data[[col_name]]))] <- 0 } } } } if (remove_selected_columns) { .data <- .data[-which(names(.data) %in% char_cols)] } .data <- fix_data_type(.data, data_type) if (omit_colname_prefix) { if (length(select_columns) == 1) { new_col_index <- as.logical(rowSums(sapply(unique_vals, function(x) grepl(paste0(select_columns, "_", x), names(.data))))) names(.data)[new_col_index] <- gsub(paste0(select_columns, "_"), "", names(.data)[new_col_index]) } else { message("Can't omit the colname prefix when recoding more than one column.") message("Returning prefixed dummy columns.") } } return(.data) } #' Fast creation of dummy variables #' #' dummy_columns() quickly creates dummy (binary) columns from character and #' factor type columns in the inputted data. This function is useful for #' statistical analysis when you want binary columns rather than #' character columns. #' #' @family dummy functions #' @seealso \code{\link{dummy_rows}} For creating dummy rows #' #' #' @inheritParams dummy_cols #' @export #' @examples #' crime <- data.frame(city = c("SF", "SF", "NYC"), #' year = c(1990, 2000, 1990), #' crime = 1:3) #' dummy_cols(crime) #' # Include year column #' dummy_cols(crime, select_columns = c("city", "year")) #' # Remove first dummy for each pair of dummy columns made #' dummy_cols(crime, select_columns = c("city", "year"), #' remove_first_dummy = TRUE) dummy_columns <- dummy_cols fastDummies/R/dummy_rows.R0000644000176200001440000001166413464411245015300 0ustar liggesusers#' Fast creation of dummy rows #' #' dummy_rows() quickly creates dummy rows to fill in missing rows #' based on all combinations of available character, factor, and #' date columns (if not otherwise specified). This is useful for #' creating balanced panel data. Columns that are not character, #' factor, or dates are filled in with NA (or whatever value you #' specify). #' #' @family dummy functions #' @seealso \code{\link{dummy_cols}} For creating dummy columns #' #' @param .data #' An object with the data set you want to make dummy columns from. #' @param select_columns #' If NULL (default), uses all character, factor, and Date columns to produce categories #' to make the dummy rows by. If not NULL, you manually enter a string or vector of strings of columns name(s). #' @param dummy_value #' Value of the row for columns that are not selected. #' Default is a value of NA. #' @param dummy_indicator #' Adds binary column to say if row is dummy or not (i.e. included in #' original data or not) #' #' @return #' A data.frame (or tibble or data.table, depending on input data type) with #' same number of columns as inputted data and original rows plus the newly #' created dummy rows #' @export #' @examples #' crime <- data.frame(city = c("SF", "SF", "NYC"), #' year = c(1990, 2000, 1990), #' crime = 1:3) #' #' dummy_rows(crime) #' # Include year column #' dummy_rows(crime, select_columns = c("city", "year")) #' # m=Make dummy value 0 #' dummy_rows(crime, select_columns = c("city", "year"), #' dummy_value = 0) #' # Add a dummy indicator #' dummy_rows(crime, select_columns = c("city", "year"), #' dummy_indicator = TRUE) dummy_rows <- function(.data, select_columns = NULL, dummy_value = NA, dummy_indicator = FALSE) { stopifnot(is.null(select_columns) || is.character(select_columns), select_columns != "", is.logical(dummy_indicator), length(dummy_indicator) == 1, length(dummy_value) == 1) if (is.atomic(.data) || ncol(.data) == 1) { stop("Cannot make dummy rows of a vector of one column data.frame/table.") } data_type <- check_type(.data) if (!data.table::is.data.table(.data)) { .data <- data.table::as.data.table(.data) } # Finds class of every column and keeps character, factor, and Date -------- if (is.null(select_columns)) { char_cols <- sapply(.data, class) char_cols <- names(.data)[char_cols %in% c("character", "factor", "Date")] if (length(char_cols) == 0) { stop("No character, factor, or Date columns found. Please use select_columns") } } else { char_cols <- select_columns } other_cols <- names(.data)[!names(.data) %in% char_cols] # Finds how many possible combinations of the variables there are. # This will be the number of rows in the new data total_length <- prod(sapply(.data[, char_cols, with = FALSE, drop = FALSE], data.table::uniqueN)) # Makes an empty data.table with right # of rows and columns. ------------- temp_table <- data.table::data.table(matrix(nrow = total_length, ncol = ncol(.data))) names(temp_table) <- names(.data) # Fills in all possible combination rows ---------------------------------- for (i in char_cols) { data.table::set(temp_table, j = i, value = rep(unique(.data[[i]]), times = total_length / data.table::uniqueN(.data[[i]]))) temp_table <- data.table::setorderv(temp_table, i) } # Adds the dummy variable columns (and indicator) ------------------------- for (i in other_cols) { data.table::set(temp_table, j = other_cols, value = rep(dummy_value, nrow(temp_table))) } if (dummy_indicator) { # Adding extra column data.table::alloc.col(temp_table, ncol(temp_table) + 1) data.table::alloc.col(.data, ncol(.data) + 1) data.table::set(.data, j = "dummy_indicator", value = 0L) data.table::set(temp_table, j = "dummy_indicator", value = rep(1L, nrow(temp_table))) } # Removes rows that were in original data. -------------------------------- data_temp_pasting <- do.call(paste0, .data[, char_cols, with = FALSE, drop = FALSE]) temp_temp_pasting <- do.call(paste0, temp_table[, char_cols, with = FALSE, drop = FALSE]) temp_table <- subset(temp_table, !temp_temp_pasting %in% data_temp_pasting) # Stacks new data on old data if (nrow(temp_table) > 0) { .data <- data.table::rbindlist(list(.data, temp_table), use.names = TRUE, fill = TRUE) } .data <- fix_data_type(.data, data_type) return(.data) } fastDummies/NEWS.md0000644000176200001440000000540014451271154013634 0ustar liggesusers# fastDummies 1.7.3 * Fix .onAttach message. # fastDummies 1.7.2 * Adds onattach message. # fastDummies 1.7.0 * Add option to omit colname prefix when only one column is recoded. Thanks to @teofiln for the PR. # fastDummies 1.6.3 * Fix bug where inputting a vector or a one column data.frame returned an issue. Now will convert the vector to a data.frame and return that. Will name each column ".data_". Closes #23 by @Garyf20. * Fix bug where `remove_most_frequent_dummy` wasn't working right when there was a tie for which value was the most frequent. Closes #22 by eden70. # fastDummies 1.6.2 * Dummy columns are now returned in alphabetical order including numeric order (e.g. photos_2 is before photos_11). # fastDummies 1.6.1 * Bug fixes. * Dummy columns are now returned in alphabetical order. # fastDummies 1.6.0 * Adds the parameter `remove_selected_columns` to `dummy_columns()`. If TRUE (not default), removes the columns which are used to create the dummy columns. # fastDummies 1.5.0 * Removes `sort_columns` parameter. Now by default will order by level if the variable is a factor type. * Fix bug where `split` parameter didn't work properly. * If value is NA, sets to NA in dummy column rather than value of 0. Closes #18 by @DLustenBerger. # fastDummies 1.4.1 * Fix bug when column is factor type when using `split` parameter. Thanks to Matthew Sigal for submitting issue on GitHub. # fastDummies 1.4.0 * Adds option to ignore NA values in dummy_cols (doesn't make a variable_NA) column is selected. Thanks to juribep5 for the GitHub suggestion. * Adds `split` parameter in dummy_cols to handle if a row has multiple categories. Thanks to Matthew Sigal and Andrew Fernandes for the GitHub suggestion. # fastDummies 1.3.0 * Adds option to sort dummy columns following the order of the original factor variable. Thanks to Patrick Baylis for the pull request with the code for this feature! # fastDummies 1.2.0 * Adds option to exclude the most frequently observed category rather than the first category as is default. Thanks to GitHub user S-UP for the suggestion! # fastDummies 1.1.0 * Thanks to GitHub user yu45020 dummy_cols() is now about >20% faster and much more memory efficient. * Both dummy_cols() and dummy_rows() now return the same data type inputted e.g. data.frame input returns data.frame, tibble returns tibble. * Fix documentation that incorrectly said default value for new dummy rows is 0. It is in fact a value of NA. # fastDummies 1.0.0 * Reduces number of parameter that were in previous version. * Significant speed increases for both dummy_cols() and dummy_rows() functions. * dummy_cols() now accepts numeric columns. fastDummies/MD50000644000176200001440000000447414451543216013061 0ustar liggesusersdfc6fd21507f6894d42ade414e3a37c7 *DESCRIPTION 731c092528258cd0a1d1af148b130fcc *LICENSE c3b8266565c53588af78f606e2f6c1a1 *NAMESPACE de44e0bbc0407f8ecb6adf74ec8df602 *NEWS.md 64c32d83089d2da3f3d114fe4cc500ea *R/dummy_cols.R 2564d426bfc94012c3606cbbadb7483f *R/dummy_rows.R e27d8668c6d4e0021bd8400d81f8e949 *R/utils.R 2983a8b2e39dcb0d577b5d049105e9a5 *build/vignette.rds 3a5798b82ecdee3e46b14bdcdb16e394 *inst/WORDLIST 65b93c57978617cb3eb27b24c025209c *inst/doc/making-dummy-rows.R 4127b091a7465de103eb985fbf8cc9c1 *inst/doc/making-dummy-rows.Rmd 15b552013dfe99c1b7212949b0484a24 *inst/doc/making-dummy-rows.html 86fcbcdfaade0cc21c020d749e72a5d9 *inst/doc/making-dummy-variables.R 436801a7e4f2ccbedfe4d549ffd92953 *inst/doc/making-dummy-variables.Rmd 1f0af5b60962a59581b0f616a186065c *inst/doc/making-dummy-variables.html c892a894a1799ef27acc7c68cb7524c0 *inst/testdata/fastDummies_data.rda 7fcf4ee915bc796243a577484e427f7a *inst/testdata/makes_test_data.R 6725acf83f3c4cbba167ea88ed9135aa *man/dummy_cols.Rd c956af5d6d0ac0143e81ce748e8c8f9c *man/dummy_columns.Rd 7aa6228ca30619a91d4f58d35144a6d9 *man/dummy_rows.Rd 332173f20d20942c02019350ea4dcadc *tests/spelling.R d3109c4c8489c587f56bd1073b0fd985 *tests/testthat.R 112317ef16c3018fc45ef0758a6bf32a *tests/testthat/test-columns-dimensions.R 0b5f9c1a3d23180271616aebd958be3c *tests/testthat/test-columns-split.R dc77896c943767aa2239caae004998c7 *tests/testthat/test-columns-type.R 1b276bfac55e88ec146fbc73b7354f56 *tests/testthat/test-columns-value-order.R be51abdf8c4f52bae204d00f1b80b982 *tests/testthat/test-columns-warnings-errors.R 86ffa604a690776c460059ed04f5e272 *tests/testthat/test-columns.R b00ba7cd33b5da5b37691ce5a88ee4fe *tests/testthat/test-ignore_na.R f2cc65a610485f44d3d1beab40b929a7 *tests/testthat/test-omit-colname-prefix.R 5d92bb1b05a42673a36704a22cd053d4 *tests/testthat/test-return-type.R a81d22fe189de37ede8af3324172dbbe *tests/testthat/test-rows-dimensions.R fa8dea1dd629feb4cf7135ebb8a42de5 *tests/testthat/test-rows-indicator-values.R 0a5982a4ede02c780ce8c7a321fd993b *tests/testthat/test-rows-right-values.R a1ab5ae1ae0e4420364c82cb32d1f896 *tests/testthat/test-rows-type.R 1a155f7f426ef6b9ec024199120df181 *tests/testthat/test-rows-warnings-errors.R 4127b091a7465de103eb985fbf8cc9c1 *vignettes/making-dummy-rows.Rmd 436801a7e4f2ccbedfe4d549ffd92953 *vignettes/making-dummy-variables.Rmd fastDummies/inst/0000755000176200001440000000000014451271273013516 5ustar liggesusersfastDummies/inst/doc/0000755000176200001440000000000014451271273014263 5ustar liggesusersfastDummies/inst/doc/making-dummy-rows.R0000644000176200001440000000276714451271272020010 0ustar liggesusers## ----echo=TRUE---------------------------------------------------------------- fastDummies_example <- data.frame(numbers = 1:3, gender = c("male", "male", "female"), animals = c("dog", "dog", "cat"), dates = as.Date(c("2012-01-01", "2011-12-31", "2012-01-01")), stringsAsFactors = FALSE) knitr::kable(fastDummies_example) ## ----echo=TRUE---------------------------------------------------------------- results <- fastDummies::dummy_rows(fastDummies_example) knitr::kable(results) ## ----echo=TRUE---------------------------------------------------------------- results <- fastDummies::dummy_rows(fastDummies_example, dummy_indicator = TRUE) knitr::kable(results) ## ----echo=TRUE---------------------------------------------------------------- results1 <- fastDummies::dummy_rows(fastDummies_example, dummy_value = 0) results2 <- fastDummies::dummy_rows(fastDummies_example, dummy_value = "new value") knitr::kable(results1) knitr::kable(results2) ## ----echo = TRUE-------------------------------------------------------------- crime <- data.frame(city = c("SF", "SF", "NYC"), year = c(1990, 2000, 1990), crime = 1:3) knitr::kable(crime) ## ----echo=TRUE---------------------------------------------------------------- results <- fastDummies::dummy_rows(crime, select_columns = c("city", "year")) knitr::kable(results) fastDummies/inst/doc/making-dummy-variables.Rmd0000644000176200001440000001003613223763615021276 0ustar liggesusers--- title: "Making dummy variables with dummy_cols()" author: "Jacob Kaplan" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Making dummy variables with dummy_cols()} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- Dummy variables (or binary variables) are commonly used in statistical analyses and in more simple descriptive statistics. A dummy column is one which has a value of one when a categorical event occurs and a zero when it doesn't occur. In most cases this is a feature of the event/person/object being described. For example, if the dummy variable was for occupation being an R programmer, you can ask, "is this person an R programmer?" When the answer is yes, they get a value of 1, when it is no, they get a value of 0. We'll start with a simple example and then go into using the function `dummy_cols()`. You can also use the function `dummy_columns()` which is identical to `dummy_cols()`. Imagine you have a data set about animals in a local shelter. One of the columns in your data is what animal it is: dog or cat. ```{r echo=FALSE} knitr::kable(data.frame(animals = c("dog", "dog", "cat"))) ``` To make dummy columns from this data, you would need to produce two new columns. One would indicate if the animal is a dog, and the other would indicate if the animal is a cat. Each row would get a value of 1 in the column indicating which animal they are, and 0 in the other column. animals | dog | cat --- | --- | --- dog | 1 | 0 dog | 1 | 0 cat | 0 | 1 In the function dummy_cols, the names of these new columns are concatenated to the original column and separated by an underscore. animals | animals_dog | animals_cat --- | --- | --- dog | 1 | 0 dog | 1 | 0 cat | 0 | 1 With an example like this, it is fairly easy to make the dummy columns yourself. `dummy_cols()` automates the process, and is useful when you have many columns to general dummy variables from or with many categories within the column. ```{r setup, echo=TRUE} fastDummies_example <- data.frame(numbers = 1:3, gender = c("male", "male", "female"), animals = c("dog", "dog", "cat"), dates = as.Date(c("2012-01-01", "2011-12-31", "2012-01-01")), stringsAsFactors = FALSE) knitr::kable(fastDummies_example) ``` The object **fastDummies_example** has two character type columns, one integer column, and a Date column. By default, `dummy_cols()` will make dummy variables from factor or character columns only. This is because in most cases those are the only types of data you want dummy variables from. If those are the only columns you want, then the function takes your data set as the first parameter and returns a data.frame with the newly created variables appended to the end of the original data. ```{r echo=TRUE} results <- fastDummies::dummy_cols(fastDummies_example) knitr::kable(results) ``` In some situations, you would want columns with types other than factor and character to generate dummy variables. For example, a column of years would be numeric but could be well-suited for making into dummy variables depending on your analysis. Use the *select_columns* parameter to select specific columns to make dummy variables from. ```{r echo=TRUE} results <- fastDummies::dummy_cols(fastDummies_example, select_columns = "numbers") knitr::kable(results) ``` The final option for `dummy_cols()` is *remove_first_dummy* which by default is FALSE. If TRUE, it removes the first dummy variable created from each column. This is done to avoid multicollinearity in a multiple regression model caused by included all dummy variables. The "first" dummy variable is the one at the top of the rows (i.e. the first value that is not NA). ```{r echo=TRUE} results <- fastDummies::dummy_cols(fastDummies_example, remove_first_dummy = TRUE) knitr::kable(results) ``` fastDummies/inst/doc/making-dummy-rows.Rmd0000644000176200001440000000722313223763627020327 0ustar liggesusers--- title: "Making dummy rows with dummy_rows()" author: "Jacob Kaplan" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Making dummy rows with dummy_rows()} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- When dealing with data, there are often missing rows. While truly handling missing data is far beyond the scope of this package, the function `dummy_rows()` lets you add those missing rows back into the data. The function takes all character, factor, and Date columns, finds all possible combinations of their values, and adds the rows that are not in the original data set. Any columns not used in creating the combinations (e.g. numeric) are given a value of NA (unless otherwise specified with *dummy_value*). Lets start with a simple example. ```{r echo=TRUE} fastDummies_example <- data.frame(numbers = 1:3, gender = c("male", "male", "female"), animals = c("dog", "dog", "cat"), dates = as.Date(c("2012-01-01", "2011-12-31", "2012-01-01")), stringsAsFactors = FALSE) knitr::kable(fastDummies_example) ``` This data set has four columns: two character, one Date, and one numeric. The function by default will use the character and Date columns in creating the combinations. First, a small amount of math to explain the combinations. Each column has two distinct values - gender: male & female; animals: dog & cat; dates: 2011-12-31 & 2011-12-31. To find the number of possible combinations, multiple the number of unique values in each column together. 2 \* 2 \* 2 = 8. ```{r echo=TRUE} results <- fastDummies::dummy_rows(fastDummies_example) knitr::kable(results) ``` When we run the function we can see that there are indeed 8 rows possible, and that the 5 rows missing from the original data have been added. To explicitly see which rows are new, set the *dummy_indicator* parameter to TRUE. This provides a column called dummy_indicator with a value of 0 if the row is in the original data and 1 if it was added. ```{r echo=TRUE} results <- fastDummies::dummy_rows(fastDummies_example, dummy_indicator = TRUE) knitr::kable(results) ``` By default, columns not used for making the combinations are given a value of NA in the new rows. You can choose the value given with the parameter *dummy_value*. It takes an input, a string or single number. ```{r echo=TRUE} results1 <- fastDummies::dummy_rows(fastDummies_example, dummy_value = 0) results2 <- fastDummies::dummy_rows(fastDummies_example, dummy_value = "new value") knitr::kable(results1) knitr::kable(results2) ``` The parameter *select_columns* lets you choose which columns to use when making the combinations. It accepts a string or vector of column names. This can come in handy when you want to include a numeric column, such as years, when making the combinations. A new data set will help demonstrate this. This data set shows (imaginary) crime in New York City and San Francisco during 1990 and 2000. The problem is that there is no row for New York City for 2000. We want to add that row. ```{r echo = TRUE} crime <- data.frame(city = c("SF", "SF", "NYC"), year = c(1990, 2000, 1990), crime = 1:3) knitr::kable(crime) ``` Using the default parameters for `dummy_rows()` doesn't give us what we want since it only selects the city column. We need to select both city and year to get all the combinations we want. ```{r echo=TRUE} results <- fastDummies::dummy_rows(crime, select_columns = c("city", "year")) knitr::kable(results) ``` fastDummies/inst/doc/making-dummy-rows.html0000644000176200001440000005627314451271273020555 0ustar liggesusers Making dummy rows with dummy_rows()

Making dummy rows with dummy_rows()

Jacob Kaplan

2023-07-05

When dealing with data, there are often missing rows. While truly handling missing data is far beyond the scope of this package, the function dummy_rows() lets you add those missing rows back into the data.

The function takes all character, factor, and Date columns, finds all possible combinations of their values, and adds the rows that are not in the original data set. Any columns not used in creating the combinations (e.g. numeric) are given a value of NA (unless otherwise specified with dummy_value).

Lets start with a simple example.

fastDummies_example <- data.frame(numbers = 1:3,
                    gender  = c("male", "male", "female"),
                    animals = c("dog", "dog", "cat"),
                    dates   = as.Date(c("2012-01-01", "2011-12-31",
                                          "2012-01-01")),
                    stringsAsFactors = FALSE)
knitr::kable(fastDummies_example)
numbers gender animals dates
1 male dog 2012-01-01
2 male dog 2011-12-31
3 female cat 2012-01-01

This data set has four columns: two character, one Date, and one numeric. The function by default will use the character and Date columns in creating the combinations. First, a small amount of math to explain the combinations. Each column has two distinct values - gender: male & female; animals: dog & cat; dates: 2011-12-31 & 2011-12-31. To find the number of possible combinations, multiple the number of unique values in each column together. 2 * 2 * 2 = 8.

results <- fastDummies::dummy_rows(fastDummies_example)
knitr::kable(results)
numbers gender animals dates
1 male dog 2012-01-01
2 male dog 2011-12-31
3 female cat 2012-01-01
NA female cat 2011-12-31
NA male cat 2011-12-31
NA female dog 2011-12-31
NA male cat 2012-01-01
NA female dog 2012-01-01

When we run the function we can see that there are indeed 8 rows possible, and that the 5 rows missing from the original data have been added.

To explicitly see which rows are new, set the dummy_indicator parameter to TRUE. This provides a column called dummy_indicator with a value of 0 if the row is in the original data and 1 if it was added.

results <- fastDummies::dummy_rows(fastDummies_example, dummy_indicator = TRUE)
knitr::kable(results)
numbers gender animals dates dummy_indicator
1 male dog 2012-01-01 0
2 male dog 2011-12-31 0
3 female cat 2012-01-01 0
NA female cat 2011-12-31 1
NA male cat 2011-12-31 1
NA female dog 2011-12-31 1
NA male cat 2012-01-01 1
NA female dog 2012-01-01 1

By default, columns not used for making the combinations are given a value of NA in the new rows. You can choose the value given with the parameter dummy_value. It takes an input, a string or single number.

results1 <- fastDummies::dummy_rows(fastDummies_example, dummy_value = 0)
results2 <- fastDummies::dummy_rows(fastDummies_example, dummy_value = "new value")
knitr::kable(results1)
numbers gender animals dates
1 male dog 2012-01-01
2 male dog 2011-12-31
3 female cat 2012-01-01
0 female cat 2011-12-31
0 male cat 2011-12-31
0 female dog 2011-12-31
0 male cat 2012-01-01
0 female dog 2012-01-01
knitr::kable(results2)
numbers gender animals dates
1 male dog 2012-01-01
2 male dog 2011-12-31
3 female cat 2012-01-01
new value female cat 2011-12-31
new value male cat 2011-12-31
new value female dog 2011-12-31
new value male cat 2012-01-01
new value female dog 2012-01-01

The parameter select_columns lets you choose which columns to use when making the combinations. It accepts a string or vector of column names. This can come in handy when you want to include a numeric column, such as years, when making the combinations. A new data set will help demonstrate this. This data set shows (imaginary) crime in New York City and San Francisco during 1990 and 2000. The problem is that there is no row for New York City for 2000. We want to add that row.

crime <- data.frame(city = c("SF", "SF", "NYC"),
                    year = c(1990, 2000, 1990),
                    crime = 1:3)
knitr::kable(crime)
city year crime
SF 1990 1
SF 2000 2
NYC 1990 3

Using the default parameters for dummy_rows() doesn’t give us what we want since it only selects the city column. We need to select both city and year to get all the combinations we want.

results <- fastDummies::dummy_rows(crime, select_columns = c("city", "year"))
knitr::kable(results)
city year crime
SF 1990 1
SF 2000 2
NYC 1990 3
NYC 2000 NA
fastDummies/inst/doc/making-dummy-variables.html0000644000176200001440000004611314451271273021523 0ustar liggesusers Making dummy variables with dummy_cols()

Making dummy variables with dummy_cols()

Jacob Kaplan

2023-07-05

Dummy variables (or binary variables) are commonly used in statistical analyses and in more simple descriptive statistics. A dummy column is one which has a value of one when a categorical event occurs and a zero when it doesn’t occur. In most cases this is a feature of the event/person/object being described. For example, if the dummy variable was for occupation being an R programmer, you can ask, “is this person an R programmer?” When the answer is yes, they get a value of 1, when it is no, they get a value of 0.

We’ll start with a simple example and then go into using the function dummy_cols(). You can also use the function dummy_columns() which is identical to dummy_cols().

Imagine you have a data set about animals in a local shelter. One of the columns in your data is what animal it is: dog or cat.

animals
dog
dog
cat

To make dummy columns from this data, you would need to produce two new columns. One would indicate if the animal is a dog, and the other would indicate if the animal is a cat. Each row would get a value of 1 in the column indicating which animal they are, and 0 in the other column.

animals dog cat
dog 1 0
dog 1 0
cat 0 1

In the function dummy_cols, the names of these new columns are concatenated to the original column and separated by an underscore.

animals animals_dog animals_cat
dog 1 0
dog 1 0
cat 0 1

With an example like this, it is fairly easy to make the dummy columns yourself. dummy_cols() automates the process, and is useful when you have many columns to general dummy variables from or with many categories within the column.

fastDummies_example <- data.frame(numbers = 1:3,
                    gender  = c("male", "male", "female"),
                    animals = c("dog", "dog", "cat"),
                    dates   = as.Date(c("2012-01-01", "2011-12-31",
                                          "2012-01-01")),
                    stringsAsFactors = FALSE)
knitr::kable(fastDummies_example)
numbers gender animals dates
1 male dog 2012-01-01
2 male dog 2011-12-31
3 female cat 2012-01-01

The object fastDummies_example has two character type columns, one integer column, and a Date column. By default, dummy_cols() will make dummy variables from factor or character columns only. This is because in most cases those are the only types of data you want dummy variables from. If those are the only columns you want, then the function takes your data set as the first parameter and returns a data.frame with the newly created variables appended to the end of the original data.

results <- fastDummies::dummy_cols(fastDummies_example)
knitr::kable(results)
numbers gender animals dates gender_female gender_male animals_cat animals_dog
1 male dog 2012-01-01 0 1 0 1
2 male dog 2011-12-31 0 1 0 1
3 female cat 2012-01-01 1 0 1 0

In some situations, you would want columns with types other than factor and character to generate dummy variables. For example, a column of years would be numeric but could be well-suited for making into dummy variables depending on your analysis. Use the select_columns parameter to select specific columns to make dummy variables from.

results <- fastDummies::dummy_cols(fastDummies_example, select_columns = "numbers")
knitr::kable(results)
numbers gender animals dates numbers_1 numbers_2 numbers_3
1 male dog 2012-01-01 1 0 0
2 male dog 2011-12-31 0 1 0
3 female cat 2012-01-01 0 0 1

The final option for dummy_cols() is remove_first_dummy which by default is FALSE. If TRUE, it removes the first dummy variable created from each column. This is done to avoid multicollinearity in a multiple regression model caused by included all dummy variables. The “first” dummy variable is the one at the top of the rows (i.e. the first value that is not NA).

results <- fastDummies::dummy_cols(fastDummies_example, remove_first_dummy = TRUE)
knitr::kable(results)
numbers gender animals dates gender_male animals_dog
1 male dog 2012-01-01 1 1
2 male dog 2011-12-31 1 1
3 female cat 2012-01-01 0 0
fastDummies/inst/doc/making-dummy-variables.R0000644000176200001440000000217614451271273020761 0ustar liggesusers## ----echo=FALSE--------------------------------------------------------------- knitr::kable(data.frame(animals = c("dog", "dog", "cat"))) ## ----setup, echo=TRUE--------------------------------------------------------- fastDummies_example <- data.frame(numbers = 1:3, gender = c("male", "male", "female"), animals = c("dog", "dog", "cat"), dates = as.Date(c("2012-01-01", "2011-12-31", "2012-01-01")), stringsAsFactors = FALSE) knitr::kable(fastDummies_example) ## ----echo=TRUE---------------------------------------------------------------- results <- fastDummies::dummy_cols(fastDummies_example) knitr::kable(results) ## ----echo=TRUE---------------------------------------------------------------- results <- fastDummies::dummy_cols(fastDummies_example, select_columns = "numbers") knitr::kable(results) ## ----echo=TRUE---------------------------------------------------------------- results <- fastDummies::dummy_cols(fastDummies_example, remove_first_dummy = TRUE) knitr::kable(results) fastDummies/inst/testdata/0000755000176200001440000000000014444350052015322 5ustar liggesusersfastDummies/inst/testdata/fastDummies_data.rda0000644000176200001440000000133513241724750021272 0ustar liggesusersBZh91AY&SYfW mM`@G J&`& iL`&`& $ښii4 =#FFR3='a @hdښu"rn7rir:,Xdo#ddenc c+vc:G7Tn g⪷B܆ؚۆÄR6Y`^zGНV)DauqA@*AS}ieēTIUXG2$Bn=?1ۥUf oI*I o*dX $)>@5n fsY  j5 øm33,qTT d`1TمȔ,(C0`F1 ? "clf;G6NFj;9a5=3l]X;gq[ֺ,=zK}[u+zsZ2bn]BCiI\fastDummies/inst/testdata/makes_test_data.R0000644000176200001440000000673513533222273020611 0ustar liggesusers# Makes data to be used for tests fastDummies_example <- data.frame(numbers = 1:3, gender = c("male", "male", "female"), animals = c("dog", "dog", "cat"), dates = as.Date(c("2012-01-01", "2011-12-31", "2012-01-01"))) fastDummies_example_tibble <- tibble::as.tibble(fastDummies_example) fastDummies_example_DT <- data.table::as.data.table(fastDummies_example) fastDummies_full <- data.frame(numbers = c(1:3, rep(NA, 5)), gender = c("male", "male", "female", "female", "male", "female", "male", "female"), animals = c("dog", "dog", "cat", "cat", "cat", "dog", "cat", "dog"), dates = as.Date(c("2012-01-01", "2011-12-31", "2012-01-01", "2011-12-31", "2011-12-31", "2011-12-31", "2012-01-01", "2012-01-01"))) fastDummies_full_tibble <- tibble::as.tibble(fastDummies_full) fastDummies_full_DT <- data.table::as.data.table(fastDummies_full) crime <- data.frame(city = c("SF", "SF", "NYC"), year = c(1990, 2000, 1990), crime = 1:3) crime_tibble <- tibble::as.tibble(crime) crime_DT <- data.table::as.data.table(crime) crime_full <- data.frame(city = c("SF", "SF", "NYC", "NYC"), year = c(1990, 2000, 1990, 2000), crime = c(1:3, NA)) crime_full_tibble <- tibble::as.tibble(crime_full) crime_full_DT <- data.table::as.data.table(crime_full) no_dummies_needed <- data.frame(animals = c("lion", "lion", "tiger", "tiger"), food = c("bread", "cake", "bread", "cake")) no_dummies_needed_tibble <- tibble::as.tibble(no_dummies_needed) no_dummies_needed_DT <- data.table::as.data.table(no_dummies_needed) devtools::use_data(crime, crime_DT, crime_tibble, crime_full, crime_full_DT, crime_full_tibble, fastDummies_example, fastDummies_example_DT, fastDummies_example_tibble, fastDummies_full, fastDummies_full_DT, fastDummies_full_tibble, no_dummies_needed, no_dummies_needed_DT, no_dummies_needed_tibble, internal = TRUE) fastDummies/inst/WORDLIST0000644000176200001440000000005113417630134014700 0ustar liggesusersAppVeyor multicollinearity tibble yu