labelled/0000755000176200001440000000000014737436072012033 5ustar liggesuserslabelled/tests/0000755000176200001440000000000014736716451013176 5ustar liggesuserslabelled/tests/testthat/0000755000176200001440000000000014737436072015035 5ustar liggesuserslabelled/tests/testthat/test-copy_labels.r0000644000176200001440000000634714736716451020504 0ustar liggesuserstest_that("copy_labels() copy variable / value labels and missing values", { x <- labelled( c(1, 1, 2), labels = c(Male = 1, Female = 2), label = "Assigned sex at birth" ) y <- copy_labels(x, 1:3) expect_equal(var_label(x), var_label(y)) expect_equal(val_labels(x), val_labels(y)) expect_equal(na_range(x), na_range(y)) expect_equal(na_values(x), na_values(y)) x <- labelled_spss( 1:10, labels = c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, 19), label = "variable label" ) y <- 10:1 %>% copy_labels_from(x) expect_equal(var_label(x), var_label(y)) expect_equal(val_labels(x), val_labels(y)) expect_equal(na_range(x), na_range(y)) expect_equal(na_values(x), na_values(y)) x <- dplyr::tibble( a = labelled(c(1, 1, 2), c(yes = 1, no = 2)), b = labelled_spss(1:3, c(top = 1, bottom = 2), na_values = 3L), c = c("a", "b", "c") ) %>% set_variable_labels( a = "variable a", b = "variable B", c = "third variable" ) y <- dplyr::tibble( c = factor(c("a", "b", "c")), b = 2, a = 1:3, d = 9:7 ) %>% copy_labels_from(x) expect_equal(var_label(x$a), var_label(y$a)) expect_equal(val_labels(x$a), val_labels(y$a)) expect_equal(na_range(x$a), na_range(y$a)) expect_equal(na_values(x$a), na_values(y$a)) expect_equal(var_label(x$b), var_label(y$b)) expect_equal(val_labels(x$b), val_labels(y$b)) expect_equal(na_range(x$b), na_range(y$b)) expect_equal(na_values(x$b), na_values(y$b)) expect_equal(var_label(x$c), var_label(y$c)) }) test_that("if 'from' is not a labelled vector, copy only variable label", { # regardless of the class of 'to' x <- 1:10 var_label(x) <- "variable label" y <- 10:1 %>% copy_labels_from(x) expect_equal(var_label(x), var_label(y)) x <- factor(1:10) var_label(x) <- "variable label" y <- 10:1 %>% as.character() %>% copy_labels_from(x) expect_equal(var_label(x), var_label(y)) }) test_that("copy_labels checks", { # do not work with a list expect_error( copy_labels(list(1, 2), 1:2) ) expect_error( copy_labels(1:2, list(1, 2)) ) # if from is a data.frame, to should also be a data.frame expect_error( copy_labels(iris, 1:2) ) expect_error( copy_labels(1:2, iris) ) # if from is a labelled vector, to should have the same type x <- labelled( c(1, 1, 2), labels = c(Male = 1, Female = 2), label = "Assigned sex at birth" ) expect_error( copy_labels(x, c("1", "2")) ) x <- labelled_spss( 1:10, labels = c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, 19), label = "variable label" ) expect_error( copy_labels(x, c("1", "2")) ) # except if .strict = FALSE x <- labelled( c(1, 1, 2), labels = c(Male = 1, Female = 2), label = "Assigned sex at birth" ) expect_no_error( copy_labels(x, c("1", "2"), .strict = FALSE) ) x <- labelled_spss( 1:10, labels = c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, 19), label = "variable label" ) expect_no_error( copy_labels(x, c("1", "2"), .strict = FALSE) ) }) labelled/tests/testthat/test-labelled.r0000644000176200001440000010772714737244525017757 0ustar liggesusers# var_label -------------------------------------------------------------- test_that("var_label works properly", { x <- 1:3 var_label(x) <- "value" expect_equal(attr(x, "label"), "value") expect_equal(var_label(x), "value") var_label(x) <- NULL expect_null(attr(x, "label")) expect_null(var_label(x)) expect_no_error(var_label(x) <- NA_character_) x <- 1:3 x <- set_variable_labels(x, "value") expect_equal(attr(x, "label"), "value") x <- set_variable_labels(x, .labels = "other value") expect_equal(attr(x, "label"), "other value") x <- set_variable_labels(x, NULL) expect_null(attr(x, "label")) }) test_that("var_label works on data.frame", { df <- data.frame(x = 1:3, y = c("a", "b", "c"), stringsAsFactors = FALSE) var_label(df$x) <- "var x" expect_equal(var_label(df$x), "var x") expect_equal(var_label(df), list(x = "var x", y = NULL)) var_label(df) <- list(y = "YY", x = "XX") expect_equal(var_label(df), list(x = "XX", y = "YY")) var_label(df) <- NULL expect_equal(var_label(df), list(x = NULL, y = NULL)) var_label(df) <- c("var1", "var2") expect_equal(var_label(df), list(x = "var1", y = "var2")) df <- set_variable_labels(df, x = "XX", .labels = "other") expect_equal(var_label(df), list(x = "XX", y = "other")) df <- set_variable_labels(df, .labels = c("var1", "var2")) expect_equal(var_label(df), list(x = "var1", y = "var2")) }) test_that("var_label produce appropriate errors", { df <- data.frame(x = 1:3, y = c("a", "b", "c"), stringsAsFactors = FALSE) expect_error(var_label(df) <- c("var1", "var2", "var3")) expect_error(var_label(df) <- list(x = "xx", z = "zz")) expect_error( df %>% set_variable_labels(.labels = list(x = "xx", z = "zz")) ) expect_error( df %>% set_variable_labels(x = "ghj", z = "ggg") ) # no error if .strict = FALSE expect_no_error( df %>% set_variable_labels(.labels = list(x = "xx", z = "zz"), .strict = FALSE) ) expect_no_error( df %>% set_variable_labels(x = "ghj", z = "ggg", .strict = FALSE) ) }) test_that("var_label preserved data.frame type", { tb <- dplyr::tibble(x = 1:3, y = c("a", "b", "c")) before <- class(tb) var_label(tb$x) <- "var x" var_label(tb) <- list(y = "YY", x = "XX") after <- class(tb) expect_equal(before, after) }) # labelled -------------------------------------------------------------- test_that("labelled return an object of class haven_labelled", { x <- labelled(c(1, 2, 3), c(yes = 1, maybe = 2, no = 3)) expect_true(is.labelled(x)) expect_s3_class(x, "haven_labelled") }) test_that("x must be numeric or character", { expect_error(labelled(TRUE)) }) test_that("x and labels must be compatible", { expect_error(labelled(1, "a")) expect_no_error(labelled(1, c(female = 2L, male = 1L))) expect_no_error(labelled(1L, c(female = 2, male = 1))) }) test_that("labels must have names", { expect_error(labelled(1, 1)) }) # val_labels and val_label ------------------------------------------------ test_that("val_labels preserves variable label", { x <- 1:3 var_label(x) <- "test" val_labels(x) <- c(yes = 1, no = 2) expect_equal(attr(x, "label", exact = TRUE), "test") val_labels(x) <- NULL expect_equal(attr(x, "label", exact = TRUE), "test") }) test_that("val_label preserves variable label", { x <- 1:3 var_label(x) <- "test" val_label(x, 1) <- "yes" expect_equal(attr(x, "label", exact = TRUE), "test") val_label(x, 1) <- NULL expect_equal(attr(x, "label", exact = TRUE), "test") }) test_that("val_labels and val_label preserves spss missing values", { x <- labelled_spss( 1:10, c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, Inf) ) val_labels(x) <- c(yes = 1, no = 3) val_label(x, 2) <- "maybe" expect_true(inherits(x, "haven_labelled")) expect_true(inherits(x, "haven_labelled_spss")) expect_equal(attr(x, "na_values"), c(9, 10)) expect_equal(attr(x, "na_range"), c(11, Inf)) val_label(x, 2) <- "maybe" expect_true(inherits(x, "haven_labelled")) expect_true(inherits(x, "haven_labelled_spss")) expect_equal(attr(x, "na_values"), c(9, 10)) expect_equal(attr(x, "na_range"), c(11, Inf)) expect_equal(attr(x, "labels", exact = TRUE), c(yes = 1, no = 3, maybe = 2)) }) test_that("value labels can be removed if missing values are defined", { x <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) val_labels(x) <- NULL expect_null(val_labels(x)) x <- labelled_spss(1:10, c(Good = 1), na_range = c(9, 20)) val_labels(x) <- NULL expect_null(val_labels(x)) }) test_that("val_labels() null action", { x <- labelled(1:10, c(Good = 1, Bad = 8)) val_labels(x, null_action = "labelled") <- NULL expect_true(inherits(x, "haven_labelled")) val_labels(x) <- NULL expect_false(inherits(x, "haven_labelled")) }) test_that("value labels to NULL remove class if na_Values et na_range are NULL", { # nolint x <- labelled_spss(1:10, c(Good = 1, Bad = 8)) val_labels(x) <- NULL expect_null(val_labels(x)) expect_equal(match("labelled", names(attributes(x)), nomatch = 0), 0) }) test_that("error with non character argument", { x <- 1 expect_error(var_label(x) <- 1) }) test_that("error with mutilple character argument", { x <- 1 expect_error(var_label(x) <- c("a", "b")) }) test_that("test if unlist argument works properly", { df <- data.frame(col1 = 1:2, col2 = 3:4, stringsAsFactors = FALSE) expect_equal(var_label(df, unlist = TRUE), c(col1 = "", col2 = "")) var_label(df) <- c("lb1", "lb2") expect_equal(var_label(df, unlist = TRUE), c(col1 = "lb1", col2 = "lb2")) }) test_that("val_labels prefixed argument 100%", { v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) vlv <- val_labels(v) vlvp <- val_labels(v, prefixed = TRUE) noms_vlvp <- names(vlvp) pos <- regexpr("] ", noms_vlvp) noms_vlvp <- substring(noms_vlvp, pos + 2) names(vlvp) <- noms_vlvp expect_equal(vlv, vlvp) }) test_that("val_labels works for dataframe", { v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) y <- 1:10 df <- data.frame(v = v, y = y, stringsAsFactors = FALSE) res <- list(v = val_labels(v), y = NULL) expect_equal(val_labels(df), res) }) test_that(" 'val_labels <-' works for dataframe", { xhs <- labelled_spss( c(1:3, NA, 5:10), c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, Inf) ) num <- 1:10 ch <- letters[1:10] fac <- factor(paste0("f", 1:10)) df <- data.frame( xhs = xhs, num = num, ch = ch, fac = fac, stringsAsFactors = FALSE ) expect_error(val_labels(df) <- c(one = 1)) valeurs <- list( xhs = c(two = 2, five = 5), ch = c(leter_a = "a"), num = c(two = 2), fac = c(three = factor(2)) ) vldf <- df expect_error(val_labels(vldf) <- valeurs) valeurs <- list( xhs = c(two = 2, five = 5), ch = c(leter_a = "a"), num = c(two = 2) ) vldf <- df expect_no_error(val_labels(vldf) <- valeurs) expect_null(val_labels(vldf)$fac) expect_equal(df$fac, vldf$fac) noms <- c("xhs", "num", "ch") expect_equal(val_labels(vldf)[noms], valeurs[noms]) val_labels(df) <- NULL expect_true(all(sapply(val_labels(df), is.null))) }) test_that("val_label works for haven_labelled", { v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) expect_equal(val_label(v, 2), NULL) expect_equal(val_label(v, 1), "yes") expect_equal(val_label(v, 1, prefixed = TRUE), "[1] yes") expect_error(val_label(v, 1:2)) }) test_that("val_label works for default", { num <- 1:3 ch <- letters[1:3] expect_equal(val_label(num, 2), NULL) expect_error(val_lable(num, 1:2)) expect_equal(val_label(ch, 1, prefixed = TRUE), NULL) expect_error(val_label(ch, 1:2)) }) test_that("val_label works for for dataframe", { xhs <- labelled_spss( c(1:3, NA, 5:10), c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, Inf) ) xh <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) num <- 1:10 df <- data.frame(xhs = xhs, num = num, xh = xh, stringsAsFactors = FALSE) expect_true(all(sapply(val_label(df, 2), is.null))) expect_equal( val_label(df, 1), list(xhs = "Good", num = NULL, xh = "yes") ) expect_equal( val_label(df, 3, prefixed = TRUE), list(xhs = NULL, num = NULL, xh = "[3] no") ) expect_error(val_lable(df, 1:2)) }) test_that(" 'val_label<-' works properly", { xhs <- labelled_spss( c(1:3, NA, 5:10), c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, Inf) ) xh <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) num <- 1:10 ch <- letters[1:10] expect_error(val_label(num, "a") <- "a") expect_error(val_label(xh, 12) <- c("one", "two")) expect_error(val_label(xhs, c(12, 13)) <- "twenty_two") df <- data.frame( xhs = xhs, num = num, xh = xh, ch = ch, stringsAsFactors = FALSE ) expect_error(val_label(df, 2) <- 2) expect_error(val_label(df, 2) <- two) expect_error(val_label(df, 2) <- c("a", "b")) expect_error(val_label(df, 2:3) <- "a") sub_df <- df[, -match("ch", names(df))] v <- as.Date("2023-01-01") l <- as.Date(c("The first day of 2023" = "2023-01-01")) expect_error(val_labels(v) <- l) }) test_that(" 'val_label<-.data.frame' works properly", { xhs <- labelled_spss( c(1:3, NA, 5:10), c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, Inf) ) num <- 1:10 ch <- letters[1:10] df <- data.frame(xhs = xhs, num = num, ch = ch, stringsAsFactors = FALSE) valeurs <- list(xhs = "2", ch = "letter_a", num = "two") df_c <- df expect_error(val_label(df_c, 2) <- valeurs) expect_error(val_label(df_c, "a") <- valeurs) val_label(df_c, 2) <- valeurs[-2] val_label(df_c, "a") <- valeurs[2] res_labels <- list( xhs = c(Good = 1, Bad = 8, "2" = 2), num = c(two = 2), ch = c(letter_a = "a") ) expect_equal(val_labels(df_c), res_labels) }) # remove_labels -------------------------------------------------------------- test_that("remove_label works correctly", { x <- c(1, 2, 2, 9) na_values(x) <- 9 val_labels(x) <- c(yes = 1, no = 2) var_label(x) <- "A test variable" expect_false(inherits(remove_labels(x), "haven_labelled")) expect_null(var_label(remove_labels(x))) expect_equal( var_label(remove_labels(x, keep_var_label = TRUE)), var_label(x) ) }) test_that("remove_labels strips labelled attributes", { var <- labelled(c(1L, 98L, 99L), c(not_answered = 98L, not_applicable = 99L)) exp <- c(1L, 98L, 99L) expect_equal(remove_labels(var), exp) }) test_that("remove_labels returns variables not of class('labelled') unmodified", { # nolint var <- c(1L, 98L, 99L) expect_equal(remove_labels(var), var) }) test_that("remove_labels works with data.frame", { var <- labelled(c(1L, 98L, 99L), c(not_answered = 98L, not_applicable = 99L)) exp <- c(1L, 98L, 99L) df <- data.frame(var = var, exp = exp, stringsAsFactors = FALSE) rmdf <- remove_labels(df) expect_equal(rmdf$exp, exp) expect_equal(rmdf$var, exp) }) test_that("remove_labels works with labelled_spss", { xhs <- haven::labelled_spss( c(1, 2, 3, NA, 99), c(t1 = 1, t2 = 2, Missing = 99), na_values = 99, na_range = c(99, Inf), label = "A test variable" ) expect_null(var_label(remove_labels(xhs))) expect_false(identical(var_label(remove_labels(xhs)), var_label(xhs))) expect_null(val_labels(remove_labels(xhs))) }) # remove_val_labels ------------------------------------------------------------ test_that("remove_labels works properly", { var <- labelled( c(1L, 98L, 99L), c(not_answered = 98L, not_applicable = 99L), label = "A variable label" ) exp <- c(1L, 98L, 99L) df <- data.frame(var = var, exp = exp, stringsAsFactors = FALSE) rmdf <- remove_val_labels(df) expect_null(val_labels(rmdf$var)) expect_false(identical(rmdf$var, exp)) expect_equal(rmdf$exp, exp) }) # remove_var_label ------------------------------------------------------------ test_that("remove_labels works properly", { var <- labelled( c(1L, 98L, 99L), c(not_answered = 98L, not_applicable = 99L), label = "A variable label" ) exp <- c(1L, 98L, 99L) df <- data.frame(var = var, exp = exp, stringsAsFactors = FALSE) rmdf <- remove_var_label(df) expect_null(var_label(rmdf$var)) expect_false(identical(rmdf$var, exp)) expect_equal(val_labels(rmdf$var), val_labels(var)) expect_equal(rmdf$exp, exp) }) # sort_val_labels --------------------------------------------------------- test_that("sort_val_labels works properly", { df <- data.frame( lab = labelled(c(1, 2, 3), c(maybe = 2, yes = 1, no = 3)), num = c(3, 1, 2), stringsAsFactors = FALSE ) sdf <- sort_val_labels(df) expect_equal( val_labels(sdf), list(lab = c(yes = 1, maybe = 2, no = 3), num = NULL) ) sdf <- sort_val_labels(df, decreasing = TRUE) expect_equal( val_labels(sdf), list(lab = c(no = 3, maybe = 2, yes = 1), num = NULL) ) sdf <- sort_val_labels(df, "l") expect_equal( val_labels(sdf), list(lab = c(maybe = 2, no = 3, yes = 1), num = NULL) ) sdf <- sort_val_labels(df, "l", TRUE) expect_equal( val_labels(sdf), list(lab = c(yes = 1, no = 3, maybe = 2), num = NULL) ) x <- c(2, tagged_na("z"), 1, tagged_na("a")) val_labels(x) <- c(no = 2, refused = tagged_na("z"), yes = 1, dk = tagged_na("a")) expect_equal( sort_val_labels(x, according_to = "v") %>% val_labels() %>% format_tagged_na() %>% trimws(), c("1", "2", "NA(a)", "NA(z)"), ignore_attr = "names" ) expect_equal( sort_val_labels(x, according_to = "l") %>% val_labels() %>% names(), c("dk", "no", "refused", "yes") ) }) # remove_user_na -------------------------------------------------------------- test_that("remove_user_na works properly", { var <- labelled( c(1L, 2L, NA, 98L, 99L), c(not_answered = 98L, not_applicable = 99L), label = "A variable label" ) exp <- c(1L, 2L, NA, 98L, 99L) xhs <- haven::labelled_spss( c(1, 2, NA, 98, 99), c(t1 = 1, t2 = 2, Missing = 99), na_values = 99, na_range = c(99, Inf), label = "A test variable" ) df <- data.frame(var = var, exp = exp, xhs = xhs, stringsAsFactors = FALSE) rmtdf <- remove_user_na(df, user_na_to_na = TRUE) expect_equal(rmtdf$var, var) expect_equal(rmtdf$exp, exp) expect_null(na_values(rmtdf$xhs)) expect_equal(rmtdf$exp, exp) rmfdf <- remove_user_na(df, user_na_to_na = FALSE) expect_false(is.null(var_label(rmfdf$var))) rmfdf <- remove_user_na(df, user_na_to_tagged_na = TRUE) expect_equal( na_tag(rmfdf$xhs), c(NA, NA, NA, NA, "a") ) x <- labelled_spss(1:100, na_range = c(50, 100)) expect_warning(remove_user_na(x, user_na_to_tagged_na = TRUE)) }) # to_factor -------------------------------------------------------------------- test_that("to_factor preserves variable label", { x <- labelled(c(1, 1, 2), c(yes = 1, no = 2)) var_label(x) <- "yes/no" expect_equal(var_label(to_factor(x)), var_label(x)) }) test_that("strict option of to_factor works correctly", { v <- labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)) expect_s3_class(to_factor(v, strict = FALSE), "factor") expect_s3_class(to_factor(v, strict = TRUE), "haven_labelled") expect_equal(class(to_factor(v, strict = TRUE, unclass = TRUE)), "numeric") }) test_that("to_factor works on data.frame", { df <- data.frame( x = labelled(c(1, 1, 2), c(yes = 1, no = 2)), y = c("a", "a", "b"), z = 1:3, stringsAsFactors = FALSE ) df2 <- to_factor(df) expect_true(is.factor(df2$x)) expect_equal(class(df2$y), class(df$y)) expect_equal(class(df2$z), class(df$z)) df3 <- to_factor(df, labelled_only = FALSE) expect_true(is.factor(df3$y)) expect_true(is.factor(df3$z)) }) test_that("to_factor does not change a factor", { x <- factor(1:2) expect_equal(to_factor(x), x) }) test_that("to_factor keeps labels", { x <- 1:2 lab_name <- "vector" var_label(x) <- lab_name expect_equal(var_label(to_factor(x)), lab_name) }) test_that("to_factor boolean parameters", { x1 <- haven::labelled_spss( c(1, 2, 3, 5, 4, NA, 99), c(t1 = 1, t2 = 2, t5 = 5, Missing = 99), na_values = 99 ) tfx <- to_factor(x1, user_na_to_na = TRUE) expect_equal(which(is.na(tfx)), 6:7) expect_equal(levels(tfx), c("t1", "t2", "3", "4", "t5")) tfx <- to_factor(x1, nolabel_to_na = TRUE) expect_equal(which(is.na(tfx)), c(3, 5, 6)) expect_equal(levels(tfx), c("t1", "t2", "t5", "Missing")) tfx <- to_factor(x1[1:3], drop_unused_labels = FALSE) expect_equal(levels(tfx), c("t1", "t2", "3", "t5", "Missing")) tfx <- to_factor(x1[1:3], drop_unused_labels = TRUE) expect_equal(levels(tfx), c("t1", "t2", "3")) }) test_that("to_factor parameters : sort_levels + levels", { x1 <- haven::labelled_spss( c(1, 2, 3, 5, 4, NA, 99), c(t1 = 1, t2 = 2, t5 = 5, Missing = 99), na_values = 99 ) tfx <- to_factor(x1, sort_levels = "auto") expect_equal(levels(tfx), c("t1", "t2", "3", "4", "t5", "Missing")) tfx <- to_factor(x1, sort_levels = "none") expect_equal(levels(tfx), c("t1", "t2", "t5", "Missing", "3", "4")) tfx <- to_factor(x1, sort_levels = "labels") expect_equal(levels(tfx), c("3", "4", "Missing", "t1", "t2", "t5")) tfx <- to_factor(x1, sort_levels = "values") expect_equal(levels(tfx), c("t1", "t2", "3", "4", "t5", "Missing")) tfx <- to_factor(x1, levels = "labels") expect_equal(levels(tfx), c("t1", "t2", "3", "4", "t5", "Missing")) tfx <- to_factor(x1, levels = "values") expect_equal(levels(tfx), c("1", "2", "3", "4", "5", "99")) tfx <- to_factor(x1, levels = "prefixed") expect_equal( levels(tfx), c("[1] t1", "[2] t2", "[3] 3", "[4] 4", "[5] t5", "[99] Missing") ) }) test_that("to_factor() and tagged NAs", { x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA) val_labels(x) <- c( yes = 1, no = 2, missing = tagged_na("a"), toto = NA ) expect_equal( to_factor(x), structure(c(1L, 2L, NA, 1L, NA, 2L, NA, NA), .Label = c("yes", "no"), class = "factor" ) ) expect_equal( to_factor(x, explicit_tagged_na = TRUE), structure(c(1L, 2L, 4L, 1L, 5L, 2L, 4L, 3L), .Label = c("yes", "no", "toto", "missing", "NA(z)"), class = "factor" ) ) }) # to_character ----------------------------------------------------------------- test_that("to_character produce an appropriate character vector", { x <- labelled(c(1, 1, 2), c(yes = 1, no = 2)) expect_equal(class(to_character(x)), "character") expect_equal(to_character(x), c("yes", "yes", "no")) }) test_that("to_character preserves variable label", { x <- labelled(c(1, 1, 2), c(yes = 1, no = 2)) var_label(x) <- "yes/no" expect_equal(var_label(to_character(x)), var_label(x)) }) test_that("to_character produce an appropriate character vector", { x <- labelled(c(1, 1, 2), c(yes = 1, no = 2)) expect_equal(class(to_character(x)), "character") expect_equal(to_character(x), c("yes", "yes", "no")) }) test_that("to_character default (100%)", { x <- 1:3 expect_equal(class(to_character(x)), "character") expect_equal(to_character(x), as.character(x)) }) test_that("to_character.double and explicit_tagged_na", { x <- c(1:3, tagged_na("a"), tagged_na("z")) expect_equal( to_character(x), c("1", "2", "3", NA, NA) ) expect_equal( to_character(x, explicit_tagged_na = TRUE), c("1", "2", "3", "NA(a)", "NA(z)") ) }) # set_value_labels and add_value_labels --------------------------------------- test_that("set_value_labels replaces all value labels", { df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) df <- set_value_labels( df, s1 = c(Male = "M", Female = "F"), s2 = c(Yes = 1, No = 2) ) expect_equal(val_labels(df$s1), c(Male = "M", Female = "F")) expect_equal(val_labels(df$s2), c(Yes = 1, No = 2)) df <- set_value_labels(df, s2 = c(Yes = 1, Unknown = 9)) expect_equal(val_labels(df$s2), c(Yes = 1, Unknown = 9)) df <- set_value_labels(df, s1 = NULL) df <- set_value_labels(df, s2 = NULL, .null_action = "lab") expect_false(inherits(df$s1, "haven_labelled")) expect_true(inherits(df$s2, "haven_labelled")) v <- set_value_labels(1:10, c(low = 1, high = 10)) expect_equal(val_labels(v), c(low = 1, high = 10)) v <- set_value_labels(1:10, low = 1, high = 10) expect_equal(val_labels(v), c(low = 1, high = 10)) v <- set_value_labels(1:10, .labels = c(low = 1, high = 10)) expect_equal(val_labels(v), c(low = 1, high = 10)) v <- set_value_labels(v, NULL) expect_null(val_labels(v)) }) test_that("set_value_labels errors", { df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) expect_error( df %>% set_value_labels( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2) ) ) expect_error( df %>% set_value_labels( .labels = list( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2) ) ) ) # no error if .strict = FALSE expect_no_error( df %>% set_value_labels( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2), .strict = FALSE ) ) expect_no_error( df %>% set_value_labels( .labels = list( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2) ), .strict = FALSE ) ) }) test_that("add_value_labels errors", { df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) expect_error( df %>% add_value_labels( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2) ) ) expect_error( df %>% add_value_labels( .labels = list( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2) ) ) ) # no error if .strict = FALSE expect_no_error( df %>% add_value_labels( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2), .strict = FALSE ) ) expect_no_error( df %>% add_value_labels( .labels = list( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2) ), .strict = FALSE ) ) expect_error(add_value_labels(df, s1 = c("F", Male = "M"))) }) test_that("add_value_labels and remove_value_labels updates the list of value labels", { # nolint df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) df <- set_value_labels( df, s1 = c(Male = "M", Female = "F"), s2 = c(Yesss = 1, No = 2) ) df <- add_value_labels(df, s2 = c(Yes = 1, Unknown = 9)) expect_equal(val_labels(df$s2), c(Yes = 1, No = 2, Unknown = 9)) df <- remove_value_labels(df, s2 = 9) expect_equal(val_labels(df$s2), c(Yes = 1, No = 2)) expect_error(remove_value_labels(df, 9)) v <- set_value_labels(1:10, low = 1, high = 10) v <- add_value_labels(v, middle = 5) v <- remove_value_labels(v, 10) expect_equal(val_labels(v), c(low = 1, middle = 5)) }) # set_variable_labels -------------------------------------------------------- test_that("set_variable_labels updates variable labels", { df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) df <- set_variable_labels(df, s1 = "Sex", s2 = "Question") expect_equal(var_label(df$s1), "Sex") df <- set_variable_labels(df, s2 = NULL) expect_null(var_label(df$s2)) }) # missing values -------------------------------------------------------------- test_that("it is possible to define missing values if no value labels were defined", { # nolint x <- c(1, 2, 2, 9) na_values(x) <- 9 expect_equal(na_values(x), 9) x <- c(1, 2, 2, 9) na_range(x) <- 9:10 expect_equal(na_range(x), 9:10) }) test_that("na_values and na_range keep variable label", { vl <- "variable label" x <- 1:9 var_label(x) <- vl na_values(x) <- 8 na_range(x) <- c(9, Inf) expect_equal(var_label(x), vl) }) # recode (dplyr) --------------------------------------------------------------- test_that("dplyr::recode could be applied to numeric labelled vector", { x <- dplyr::recode(labelled(1:3, c(yes = 1, no = 2)), `3` = 2L) expect_equal(x, labelled(c(1L, 2L, 2L), c(yes = 1, no = 2))) }) test_that("dplyr::recode could be applied to character labelled vector", { x <- dplyr::recode( labelled(c("a", "b", "c"), c(yes = "a", no = "b")), c = "b" ) expect_equal(x, labelled(c("a", "b", "b"), c(yes = "a", no = "b"))) }) test_that("dplyr::recode could handle NA with .combine_value_labels", { x <- labelled(c(NA, 1:3), c(yes = 1, maybe = 2, no = 3)) y <- x %>% dplyr::recode(`2` = 0L, .combine_value_labels = TRUE) expect_true(all(c(0, 1, 3) %in% val_labels(y))) y <- x %>% dplyr::recode(`2` = 0L, `3` = 0L, .combine_value_labels = TRUE) expect_true(all(c(0, 1) %in% val_labels(y))) expect_equal(val_label(y, 0), "maybe / no") }) # update_labelled ---------------------------------------- test_that("update_labelled update previous haven's labelled objects but not Hmisc's labelled objects", { # nolint vhaven <- structure( 1:4, label = "label", labels = c(No = 1, Yes = 2), class = "labelled" ) vHmisc <- structure(1:4, label = "label", class = "labelled") expect_s3_class(update_labelled(vhaven), "haven_labelled") expect_s3_class(update_labelled(vHmisc), "labelled") df <- dplyr::tibble(vhaven, vHmisc) expect_s3_class(update_labelled(df)$vhaven, "haven_labelled") expect_s3_class(update_labelled(df)$vHmisc, "labelled") }) test_that("update_labelled update to haven_labelled_spss if there are na values", { # nolint v1 <- structure(1:4, label = "label", labels = c(No = 1, Yes = 2), na_values = c(8, 9), class = c("labelled_spss", "labelled") ) v2 <- structure(1:4, label = "label", labels = c(No = 1, Yes = 2), na_range = c(8, 9), class = c("labelled_spss", "labelled") ) expect_s3_class(update_labelled(v1), "haven_labelled_spss") expect_s3_class(update_labelled(v1), "haven_labelled_spss") }) test_that("update_labelled preserve variable and value labels", { v <- structure( 1:4, label = "variable label", labels = c(No = 1, Yes = 2), class = "labelled" ) expect_equal(var_label(update_labelled(v)), "variable label") expect_equal(val_labels(update_labelled(v)), c(No = 1, Yes = 2)) }) test_that("update_labelled do nothing if it's not a labelled vector", { x <- 1:10 expect_equal(update_labelled(x), x) }) test_that("update_labelled works with labelled from haven 2.0", { data(x_haven_2.0) x <- labelled(c(1, 2, 1, 2, 10, 9), c(Unknown = 9, Refused = 10)) expect_false(identical(x, x_haven_2.0)) up_x_haven_2.0 <- update_labelled(x_haven_2.0) expect_equal(x, up_x_haven_2.0) data(x_spss_haven_2.0) x2 <- labelled_spss( 1:10, c(Good = 1, Bad = 8), na_range = c(9, Inf), label = "Quality rating" ) expect_false(identical(x2, x_spss_haven_2.0)) up_x_spss_haven_2.0 <- update_labelled(x_spss_haven_2.0) expect_equal(x2, up_x_spss_haven_2.0) }) # remove_attributes ------------------------------------------------------------ test_that("remove_attributes does not transform characters into factors", { d <- data.frame( ch = structure(letters[1:2], some_attribute = TRUE), stringsAsFactors = FALSE ) d <- remove_attributes(d, "some_attribute") expect_true(is.character(d$ch)) }) # unlabelled ------------------------------------------------------------------ test_that("unlabelled works correctly", { df <- data.frame( a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), c = labelled(c("a", "a", "b", "c"), labels = c(No = "a", Yes = "b")), stringsAsFactors = FALSE ) df <- unlabelled(df) expect_equal(class(df$a), "numeric") expect_s3_class(df$b, "factor") expect_equal(class(df$c), "character") v <- labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)) expect_s3_class(unlabelled(v), "factor") v <- labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)) expect_false(inherits(unlabelled(v), "haven_labelled")) expect_false(is.factor(unlabelled(1:4))) }) # remove_label ------------------------------------------ test_that("remove_label works correctly", { x <- c(1, 2, 2, 9) na_values(x) <- 9 val_labels(x) <- c(yes = 1, no = 2) var_label(x) <- "A test variable" expect_false(inherits(remove_labels(x), "haven_labelled")) expect_null(var_label(remove_labels(x))) expect_equal( var_label(remove_labels(x, keep_var_label = TRUE)), var_label(x) ) }) # recode -------------------------------------------------------------- test_that("dplyr::recode works properly with labelled vectors", { x <- labelled(1:3, c(yes = 1, no = 2)) r <- dplyr::recode(x, `3` = 2L) expect_equal(r, labelled(c(1L, 2L, 2L), val_labels(x))) r <- dplyr::recode(x, `3` = 2L, .keep_value_labels = FALSE) expect_equal(r, c(1L, 2L, 2L)) expect_warning(dplyr::recode(x, `3` = "a", .default = "b")) x <- labelled(1:4, c(a = 1, b = 2, c = 3, d = 4)) r <- dplyr::recode( x, `1` = 1L, `2` = 1L, `3` = 2L, `4` = 2L, .combine_value_labels = TRUE ) expect_equal(val_labels(r), c("a / b" = 1L, "c / d" = 2L)) r <- dplyr::recode(x, `2` = 1L, `4` = 3L, .combine_value_labels = TRUE) expect_equal(val_labels(r), c("a / b" = 1L, "c / d" = 3L)) r <- dplyr::recode( x, `2` = 1L, `4` = 3L, .combine_value_labels = TRUE, .sep = " or " ) expect_equal(val_labels(r), c("a or b" = 1L, "c or d" = 3L)) y <- labelled(1:4, c(a = 1)) r <- dplyr::recode(y, `2` = 1L, `4` = 3L, .combine_value_labels = TRUE) expect_equal(val_labels(r), c(a = 1L)) }) # tidy dots -------------------------------------------------------------- test_that("functions with dots accept tidy evaluation (`!!!` operator)", { df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) variable_list <- list(s1 = "Sex", s2 = "Question") df <- set_variable_labels(df, !!!variable_list) expect_equal(var_label(df$s1), "Sex") expect_equal(var_label(df$s2), "Question") df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) labels_list <- list( s1 = c(Male = "M", Female = "F"), s2 = c(Yes = 1, No = 2) ) df <- set_value_labels(df, !!!labels_list) expect_equal(val_labels(df$s1), c(Male = "M", Female = "F")) expect_equal(val_labels(df$s2), c(Yes = 1, No = 2)) df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) df <- set_value_labels( df, s1 = c(Male = "M", Female = "F"), s2 = c(Yesss = 1, No = 2) ) added_values_list <- list(s2 = c(Yes = 1, Unknown = 9)) df <- add_value_labels(df, !!!added_values_list) expect_equal(val_labels(df$s2), c(Yes = 1, No = 2, Unknown = 9)) removed_values_list <- list(s2 = 9) df <- remove_value_labels(df, !!!removed_values_list) expect_equal(val_labels(df$s2), c(Yes = 1, No = 2)) }) # drop_unused_value_labels ------------------------------------------------ test_that("drop_unused_value_labels works properly with data.frame", { x <- labelled(c(1, 2, 2, 1), c(yes = 1, no = 2, maybe = 3)) y <- 1:4 df <- data.frame(x = x, y = y, stringsAsFactors = FALSE) ddf <- drop_unused_value_labels(df) expect_false(identical(ddf$x, x)) expect_equal(ddf$y, y) expect_false(identical(val_labels(ddf$x), val_labels(x))) expect_equal(val_labels(ddf$x), val_labels(x)[-3]) }) # nolabel_to_na ----------------------------------------------------------- test_that("nolabel_to_na works properly", { x <- labelled(c(1, 2, 9, 1, 9), c(yes = 1, no = 2)) y <- 1:5 df <- data.frame(x = x, y = y, stringsAsFactors = FALSE) nldf <- nolabel_to_na(df) expect_false(identical(nldf$x, x)) expect_equal(nldf$y, y) expect_equal(which(is.na(nldf$x)), c(3L, 5L)) }) # val_labels_to_na ----------------------------------------------------------- test_that("val_labels_to_na works properly", { x <- labelled(c(1, 2, 9, 1, 9), c(dk = 9)) y <- 1:5 df <- data.frame(x = x, y = y, stringsAsFactors = FALSE) vldf <- val_labels_to_na(df) expect_false(identical(vldf$x, x)) expect_equal(vldf$y, y) expect_null(val_labels(vldf$x)) expect_equal(which(is.na(vldf$x)), c(3L, 5L)) }) # names_prefixed_by_values ------------------------------------------------ test_that("names_prefixed_by_values works properly", { df <- dplyr::tibble( c1 = labelled(c("M", "M", "F"), c(Male = "M", Female = "F")), c2 = labelled(c(1, 1, 2), c(Yes = 1, No = 2)), ) res_names_prefixed <- list( c1 = c("[M] Male", "[F] Female"), c2 = c("[1] Yes", "[2] No") ) expect_equal( names_prefixed_by_values(val_labels(df)), res_names_prefixed, ignore_attr = "names" ) expect_null(names_prefixed_by_values(NULL)) }) test_that("null_action in var_label() works as expected", { df <- datasets::iris %>% set_variable_labels( Petal.Length = "length of petal", Petal.Width = "width of petal" ) expect_equal( var_label(df), list( Sepal.Length = NULL, Sepal.Width = NULL, Petal.Length = "length of petal", Petal.Width = "width of petal", Species = NULL ) ) expect_equal( var_label(df, null_action = "fi"), list( Sepal.Length = "Sepal.Length", Sepal.Width = "Sepal.Width", Petal.Length = "length of petal", Petal.Width = "width of petal", Species = "Species" ) ) expect_equal( var_label(df, null_action = "na"), list( Sepal.Length = NA_character_, Sepal.Width = NA_character_, Petal.Length = "length of petal", Petal.Width = "width of petal", Species = NA_character_ ) ) expect_equal( var_label(df, null_action = "empty"), list( Sepal.Length = "", Sepal.Width = "", Petal.Length = "length of petal", Petal.Width = "width of petal", Species = "" ) ) expect_equal( var_label(df, null_action = "skip"), list( Petal.Length = "length of petal", Petal.Width = "width of petal" ) ) expect_error(var_label(df$Species, null_action = "skip")) }) test_that("var_label works with packed columns", { d <- iris %>% tidyr::as_tibble() %>% tidyr::pack( Sepal = starts_with("Sepal"), Petal = starts_with("Petal"), .names_sep = "." ) d <- d %>% set_variable_labels(Sepal = "Label of the Sepal df-column") expect_equal( label_attribute(d$Sepal), "Label of the Sepal df-column" ) d$Petal <- d$Petal %>% set_variable_labels( Length = "Petal length", Width = "Petal width" ) expect_equal( label_attribute(d$Petal$Length), "Petal length" ) expect_equal( length(var_label(d)), 3L ) expect_equal( length(var_label(d, recurse = TRUE)), 3L ) expect_equal( length(var_label(d, recurse = TRUE, unlist = TRUE)), 5L ) }) labelled/tests/testthat/test-to_labelled.r0000644000176200001440000001517314736716451020453 0ustar liggesuserstest_that("to_labelled.factor preserves variable label", { x <- factor(c(1, 1, 2)) var_label(x) <- "test" expect_equal(var_label(to_labelled(x)), var_label(x)) x <- factor(c("no", "yes", "no")) var_label(x) <- "test" expect_equal( var_label(to_labelled(x, labels = c("yes" = 1, "no" = 2))), var_label(x) ) }) test_that("to_labelled.factor preserves labelled character vectors", { s1 <- labelled(c("M", "M", "F"), c(Male = "M", Female = "F")) expect_equal(s1, to_labelled(to_factor(s1), val_labels(s1))) }) test_that("to_labelled.factor preserves labelled numerical vectors", { s2 <- labelled(c(1, 1, 2), c(Male = 1, Female = 2)) expect_equal(s2, to_labelled(to_factor(s2), val_labels(s2))) }) test_that("to_labelled.factor converts to NA factor levels not found in labels", { # nolint f <- factor( c("yes", "yes", "no", "no", "don't know", "no", "yes", "don't know") ) expect_equal( to_labelled(f, c("yes" = 1, "no" = 2)), labelled(c(1, 1, 2, 2, NA, 2, 1, NA), c("yes" = 1, "no" = 2)) ) }) test_that("to_labelled.factor accepts non continuous labels", { f <- factor( c("yes", "yes", "no", "no", "don't know", "no", "yes", "don't know") ) expect_equal( to_labelled(f, c("yes" = 1, "no" = 2, "don't know" = 9)), labelled( c(1, 1, 2, 2, 9, 2, 1, 9), c("yes" = 1, "no" = 2, "don't know" = 9) ) ) }) test_that("to_labelled.factor works with '[code] label' factors", { l <- labelled( c(1, 1, 2, 2, 9, 2, 1, 9), c("yes" = 1, "no" = 2, "don't know" = 9) ) expect_equal( to_factor(l, levels = "p") %>% to_labelled(), l ) l <- labelled( c("M", "M", "F", "X", "N/A"), c(Male = "M", Female = "F", Refused = "X", "Not applicable" = "N/A") ) expect_equal( to_factor(l, levels = "p") %>% to_labelled(), l ) # if labels is provided apply normal rule l <- labelled( c(1, 1, 2, 2, 9, 2, 1, 9), c("yes" = 1, "no" = 2, "don't know" = 9) ) f <- to_factor(l, levels = "p") x <- f %>% to_labelled(labels = c("[1] yes" = 123, "[2] no" = 456)) expect_equal( unclass(x), c(123, 123, 456, 456, NA, 456, 123, NA), ignore_attr = "labels" ) # should not be applied if duplicates in code f <- factor(c("[1] yes", "[2] no", "[1] don't know")) expect_warning(l <- to_labelled(f)) expect_no_warning(l <- to_labelled(f, .quiet = TRUE)) expect_identical( names(val_labels(l)), levels(f) ) # check potential duplicates in numerical codes f <- factor(c("[1] yes", "[1.0] no", "[01] don't know")) expect_warning(to_labelled(f)) expect_no_warning(to_labelled(f, .quiet = TRUE)) expect_true(is.character(to_labelled(f, .quiet = TRUE))) }) # foreign_to_labelled ----------------------------------------------------- test_that("foreign_to_labelled works correctly", { utils::data("spss_file", package = "labelled") utils::data("dta_file", package = "labelled") tl_spss_list <- to_labelled(spss_file) expect_equal( val_labels(tl_spss_list), sapply(spss_file, function(x) attr(x, "value.labels", exact = TRUE)) ) expect_equal( var_label(tl_spss_list), as.list(attr(spss_file, "variable.labels", exact = TRUE)) ) miss_attr <- attr(spss_file, "missings", exact = TRUE) miss_list <- lapply( miss_attr, function(x) { if (x$type == "none") { return(NULL) } x$value } ) expect_equal(sapply(tl_spss_list, na_values), miss_list) expect_true( all( which(sapply(tl_spss_list, function(x) any(is.na(x)))) == c(4, 5, 7, 10) ) ) tl_spss_df <- to_labelled(as.data.frame(spss_file, stringsAsFactors = FALSE)) expect_equal( val_labels(tl_spss_df), sapply(spss_file, function(x) attr(x, "value.labels", exact = TRUE)) ) expect_true(all(sapply(var_label(tl_spss_df), is.null))) expect_true(all(sapply(sapply(tl_spss_df, na_values), is.null))) expect_true(all(sapply(sapply(tl_spss_df, na_range), is.null))) tl_dta_df <- to_labelled(dta_file) expect_equal( val_labels(tl_dta_df), sapply(dta_file, function(x) attr(x, "value.labels", exact = TRUE)) ) expect_equal( unname(unlist(var_label(tl_dta_df))), attr(dta_file, "var.labels", exact = TRUE) ) expect_true(all(sapply(sapply(tl_dta_df, na_values), is.null))) expect_true(all(sapply(sapply(tl_dta_df, na_range), is.null))) }) # memisc_to_labelled ----------------------------------------------------- test_that("memisc_to_labelled works correctly", { skip_if_not_installed("memisc") ds <- memisc::data.set( vote = sample(c(1, 2, 3, 8, 9, 97, 99), size = 300, replace = TRUE), region = sample(c(rep(1, 3), rep(2, 2), 3, 99), size = 300, replace = TRUE), income = exp(rnorm(300, sd = .7)) * 2000 ) memisc::description(ds$vote) <- "Vote intention" memisc::description(ds$region) <- "Region of residence" memisc::description(ds$income) <- "Household income" memisc::missing.values(ds$vote) <- c(97, 99) memisc::missing.values(ds$region) <- list(range = c(90, Inf)) memisc::labels(ds$region) <- c( England = 1, Scotland = 2, Wales = 3, "Not applicable" = 97, "Not asked in survey" = 99 ) memisc::labels(ds$vote) <- c( Conservatives = 1, Labour = 2, "Liberal Democrats" = 3, "Don't know" = 8, "Answer refused" = 9, "Not applicable" = 97, "Not asked in survey" = 99 ) tl_ds <- to_labelled(ds) desc <- data.frame(memisc::description(ds)) var_label_ds <- desc[, 2] names(var_label_ds) <- desc[, 1] expect_identical(unlist(var_label(tl_ds)), var_label_ds) if (any(sapply(val_labels(tl_ds), function(x) !is.null(x)))) { val_labels_ds <- lapply(ds, function(x) memisc::labels(x)) val_labels_ds <- lapply(ds, function(x) { vlabs <- memisc::labels(x) if (is.null(vlabs)) { return(NULL) } vals <- vlabs@values names(vals) <- vlabs@.Data vals }) expect_identical(val_labels(tl_ds), val_labels_ds) } }) test_that("to_character works on data.frame", { df <- data.frame( x = labelled(c(1, 1, 2), c(yes = 1, no = 2)), y = c("a", "a", "b"), z = 1:3, stringsAsFactors = FALSE ) df2 <- to_character(df) expect_true(is.character(df2$x)) expect_equal(class(df2$y), class(df$y)) expect_equal(class(df2$z), class(df$z)) df3 <- to_character(df, labelled_only = FALSE) expect_true(is.character(df3$y)) expect_true(is.character(df3$z)) }) labelled/tests/testthat/test-update_with.r0000644000176200001440000000323014736716451020511 0ustar liggesuserstest_that("update_variable_labels_with() works as expected", { df <- iris %>% set_variable_labels( Sepal.Length = "Length of sepal", Sepal.Width = "Width of sepal", Petal.Length = "Length of petal", Petal.Width = "Width of petal", Species = "Species" ) tmp <- df %>% update_variable_labels_with(toupper) expect_equal(var_label(tmp$Species), "SPECIES") expect_equal(var_label(tmp$Sepal.Length), "LENGTH OF SEPAL") expect_equal(var_label(tmp$Petal.Width), "WIDTH OF PETAL") tmp <- df %>% update_variable_labels_with(toupper, .cols = dplyr::starts_with("S")) expect_equal(var_label(tmp$Species), "SPECIES") expect_equal(var_label(tmp$Sepal.Length), "LENGTH OF SEPAL") expect_equal(var_label(tmp$Petal.Width), "Width of petal") tmp <- df %>% update_variable_labels_with(~ tolower(names(.x))) expect_equal(var_label(tmp$Species), "species") expect_equal(var_label(tmp$Sepal.Length), "sepal.length") tmp <- iris %>% update_variable_labels_with(~ tolower(names(.x))) expect_equal(var_label(tmp$Species), "species") expect_equal(var_label(tmp$Sepal.Length), "sepal.length") }) test_that("update_value_labels_with() works as expected", { df <- iris df$Species <- to_labelled(df$Species) tmp <- df %>% update_value_labels_with(toupper) expect_equal(val_label(tmp$Species, 1), "SETOSA") tmp <- df %>% update_value_labels_with(toupper, .cols = dplyr::starts_with("P")) expect_equal(val_label(tmp$Species, 1), "setosa") tmp <- df %>% update_value_labels_with(casefold, upper = TRUE) expect_equal(val_label(tmp$Species, 1), "SETOSA") }) labelled/tests/testthat/test-na_values.R0000644000176200001440000001170514737244525020116 0ustar liggesusers# na_values -------------------------------------------------------------------- test_that("na_values works with data.frame", { xhs <- haven::labelled_spss( c(1, 2, 3, NA, 99), c(t1 = 1, t2 = 2, Missing = 99), na_values = 99, label = "variable label" ) y <- c(1:4, NA) df <- data.frame(xhs = xhs, y = y, stringsAsFactors = FALSE) res <- list(xhs = 99, y = NULL) expect_equal(na_values(df), res) }) # na_range -------------------------------------------------------------------- test_that("na_range works with data.frame", { xhs <- haven::labelled_spss( c(1, 2, 3, NA, 99), c(t1 = 1, t2 = 2, Missing = 99), na_values = 99, na_range = c(99, Inf), label = "variable label" ) y <- c(1:4, NA) df <- data.frame(xhs = xhs, y = y, stringsAsFactors = FALSE) res <- list(xhs = c(99, Inf), y = NULL) expect_equal(na_range(df), res) }) # user_na_to_na ---------------------------------------------------------------- test_that("user_na_to_na works with data.frame", { xhs <- haven::labelled_spss( c(c(1, 2, 3), NA, 99), c(t1 = 1, t2 = 2, Missing = 99), na_values = 99, na_range = c(99, Inf), label = "variable label" ) y <- c(1:4, NA) df <- data.frame(xhs = xhs, y = y, stringsAsFactors = FALSE) una_df <- user_na_to_na(df) expect_equal(df$y, y) expect_null(na_values(una_df$xhs)) expect_null(na_range(una_df$xhs)) }) # set_na_values ---------------------------------------------------------------- test_that("set_na_values works correctly", { df <- dplyr::tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) svdf <- set_value_labels(df, s2 = c(yes = 1, no = 2)) sna_svdf <- set_na_values(svdf, s2 = 9) expect_equal(which(is.na(sna_svdf$s2)), 4L) expect_error(set_na_values(svdf, s4 = 8, s2 = 9)) sna_svdfF <- set_na_values(svdf, s4 = 8, s2 = 9, .strict = FALSE) expect_equal(sna_svdf, sna_svdfF) expect_error(set_na_values(svdf, .values = list(s2 = 9, s4 = 3))) expect_error(set_na_values(svdf, .values = c(s2 = 9))) snu_svdf <- set_na_values(sna_svdfF, s2 = NULL) expect_equal(snu_svdf, svdf) snu_svdf <- set_na_values(sna_svdf, s2 = NULL) expect_equal(snu_svdf, svdf) df <- dplyr::tibble(s1 = c(2, 4, 7, 9), s2 = c(1, 1, 2, 9)) svdf <- set_value_labels(df, s2 = c(yes = 1, no = 2)) sna_svdf <- set_na_values(svdf, .values = 9L) expect_equal(na_values(sna_svdf), list(s1 = 9, s2 = 9)) sna_svdfF <- set_na_values( svdf, .values = list(s1 = 9, s3 = 2), .strict = FALSE ) expect_equal(na_values(sna_svdfF), list(s1 = 9, s2 = NULL)) }) # set_na_range ----------------------------------------------------------------- test_that("set_na_range works correctly", { df <- dplyr::tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) svdf <- set_value_labels(df, s2 = c(yes = 1, no = 2)) expect_error(set_na_range(svdf, s2 = 9)) expect_error(set_na_range(svdf, n2 = c(9, Inf))) snr_svdf <- set_na_range(svdf, s2 = c(9, Inf)) expect_equal(na_range(snr_svdf$s2), c(9, Inf)) expect_error(set_na_range(svdf, s2 = c(9, Inf), s4 = c(0, 10))) snr_svdfF <- set_na_range(svdf, s2 = c(9, Inf), s4 = c(0, 10), .strict = FALSE) expect_equal(snr_svdfF, snr_svdf) expect_error(set_na_range(svdf, .values = c(s2 = 9))) expect_error( set_na_range(svdf, .values = list(s2 = c(9, Inf), s4 = c(0, 10))) ) snrv_svdf <- set_na_range(svdf, .values = list(s2 = c(9, Inf))) expect_equal(snrv_svdf, snr_svdf) snrv_svdfF <- set_na_range( svdf, .values = list(s2 = c(9, Inf), s4 = c(0, 10)), .strict = FALSE ) expect_equal(snrv_svdfF, snr_svdf) df <- dplyr::tibble(s1 = c(2, 4, 7, 9), s2 = c(1, 1, 2, 9)) svdf <- set_value_labels(df, s2 = c(yes = 1, no = 2)) sna_svdf <- set_na_range(svdf, .values = c(9L, 100L)) expect_equal(na_range(sna_svdf), list(s1 = c(9, 100), s2 = c(9, 100))) x <- factor(1:5) expect_error(na_values(x) <- 1) expect_error(na_range(x) <- 4:5) v <- 1:10 v <- set_na_range(v, 3, 5) v <- set_na_values(v, 8, 9) expect_equal(na_range(v), c(3, 5)) expect_equal(na_values(v), c(8, 9)) }) test_that("about user NAs", { v <- labelled_spss( c(1, 2, 9, 3, 9, 1, NA), labels = c(yes = 1, no = 3, "don't know" = 9), na_values = 9 ) expect_equal( is.na(v), c(FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE) ) expect_equal( is_user_na(v), c(FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE) ) expect_equal( user_na_to_tagged_na(v) %>% format_tagged_na() %>% trimws(), c("1", "2", "NA(a)", "3", "NA(a)", "1", "NA") ) expect_equal( user_na_to_na(letters), letters ) x <- c(NA, 9, tagged_na("a")) na_values(x) <- 9 expect_equal(is.na(x), c(TRUE, TRUE, TRUE)) expect_equal(is_regular_na(x), c(TRUE, FALSE, FALSE)) expect_equal(is_user_na(x), c(FALSE, TRUE, FALSE)) expect_equal(is_tagged_na(x), c(FALSE, FALSE, TRUE)) }) labelled/tests/testthat/test_lookfor.R0000644000176200001440000001060214736716451017672 0ustar liggesuserstest_that("look_for works correctly", { df <- data.frame( 1:3, letters[1:3], fix.empty.names = FALSE, stringsAsFactors = FALSE ) expect_error(look_for(df)) expect_error(look_for(unname(df))) df <- data.frame(num = 1:3, ch = letters[1:3], stringsAsFactors = FALSE) res <- look_for(df, "e") capture.output(print(res)) expect_true(nrow(res) == 0) }) test_that("look_for works with a single keyword.", { expect_equal( look_for(iris, "sep")$variable, c("Sepal.Length", "Sepal.Width") ) lfi <- look_for(iris, "s") expect_equal( lfi$variable, c("Sepal.Length", "Sepal.Width", "Species") ) expect_equal( lfi$levels, list( "Sepal.Length" = NULL, "Sepal.Width" = NULL, "Species" = levels(iris$Species) ) ) expect_equal(lfi$variable, names(iris)[lfi$pos]) expect_no_error( look_for(iris, "petal") %>% dplyr::select(pos) %>% print() ) }) test_that("look_for works with no single keyword.", { expect_equal( look_for(iris, details = TRUE)$variable, names(iris) ) }) test_that("look_for works with a regular expression", { lfi <- look_for(iris, "s") expect_identical(look_for(iris, "sepal|species"), lfi) lfi <- look_for(iris, "s$") expect_identical( lfi$levels[[lfi$variable]], levels(iris$Species) ) }) test_that("look_for works with several keywords", { expect_equal( look_for(iris, details = "none", "s", "w")$variable, c("Sepal.Length", "Sepal.Width", "Petal.Width", "Species") ) expect_equal( look_for(iris, "Pet", "sp", "width", ignore.case = FALSE)$variable, c("Petal.Length", "Petal.Width") ) }) test_that(" look_for with different details parameter values", { expect_false("levels" %in% names(look_for(iris, details = "none"))) expect_false("range" %in% names(look_for(iris, "Sep"))) expect_equal( look_for(iris, details = TRUE, "sep")$range, list( Sepal.Length = range(iris$Sepal.Length), Sepal.Width = range(iris$Sepal.Width) ) ) }) test_that(" convert_list_columns_to_character works correctly", { lfi_conv <- look_for(iris, "spe", details = TRUE) %>% convert_list_columns_to_character() expect_equal( unname(lfi_conv$levels), paste(levels(iris$Species), collapse = "; ") ) lfi_conv <- look_for(iris, "al", details = TRUE) %>% convert_list_columns_to_character() expect_identical( lfi_conv$range, sapply( lapply(iris[, lfi_conv$variable], range), function(x) paste(x, collapse = " - ") ) ) lfi_conv <- look_for(iris, "sep") %>% convert_list_columns_to_character() expect_true(all(lfi_conv$levels == c("", ""))) expect_true(all(lfi_conv$value_labels == c("", ""))) }) test_that(" look_for_and_select works correctly", { expect_equal( names(look_for_and_select(iris, "sep")), c("Sepal.Length", "Sepal.Width") ) }) test_that(" print.look_for works correctly", { pp <- print(look_for(iris)) expect_equal( pp$variable[nchar(pp$variable) != 0], names(iris) ) expect_equal( pp$values[nchar(pp$values) != 0], levels(iris$Species) ) }) test_that(" lookfor_to_long_format works correctly", { lf2lf <- look_for(iris) %>% lookfor_to_long_format() expect_equal( lf2lf$levels[lf2lf$variable == "Species"], levels(iris$Species) ) expect_equal( iris, lookfor_to_long_format(iris) ) expect_true(all(is.na(lf2lf$levels[lf2lf$variable != "Species"]))) }) test_that("look_for get var_label", { df <- data.frame(col1 = 1:2, col2 = 3:4, stringsAsFactors = FALSE) expect_equal(nrow(look_for(df, "lb")), 0) var_label(df) <- c("lb1", "lb2") lfd <- look_for(df, "lb") expect_equal( lfd$variable, names(df) ) expect_equal( unname(lfd$label), c("lb1", "lb2") ) }) test_that("look_for works with factor levels and value labels", { res <- iris %>% look_for("vers", details = "none") expect_equal(res$variable, "Species") res <- iris %>% look_for("vers", details = "none", values = FALSE) expect_equal(nrow(res), 0) df <- iris df$Species <- to_labelled(df$Species) res <- df %>% look_for("vers", details = "none") expect_equal(res$variable, "Species") res <- df %>% look_for("vers", details = "none", values = FALSE) expect_equal(nrow(res), 0) }) labelled/tests/testthat/test-miscellanous.R0000644000176200001440000000073014736716451020634 0ustar liggesusers# is_prefixed ------------------------------------------------------------- test_that("error with non factor argument", { x <- 1:2 expect_error(is_prefixed("x")) }) test_that("is_prefixed() works properly", { x <- labelled( c(1, 2, 2, 2, 9, 1, 2, NA), c(yes = 1, no = 2, "don't know" = 9) ) tfx <- to_factor(x, levels = "prefixed") expect_true(is_prefixed(tfx)) levels(tfx)[1] <- "not prefixed" expect_false(is_prefixed(tfx)) }) labelled/tests/testthat/test-tagged_na.r0000644000176200001440000000502614736716451020112 0ustar liggesuserstest_that("unique_tagged_na(), duplicated_tagged_na, order_tagged_na and sort_tagged_na work as expected", { # nolint x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA) expect_equal( unique_tagged_na(x) %>% format_tagged_na() %>% trimws(), c("1", "2", "NA(a)", "NA(z)", "NA") ) expect_equal( unique_tagged_na(x, fromLast = TRUE) %>% format_tagged_na() %>% trimws(), c("1", "NA(z)", "2", "NA(a)", "NA") ) expect_equal( duplicated_tagged_na(x), c(FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE) ) expect_equal( duplicated_tagged_na(x, fromLast = TRUE), c(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE) ) expect_equal( sort_tagged_na(x) %>% format_tagged_na() %>% trimws(), c("1", "1", "2", "2", "NA(a)", "NA(a)", "NA(z)", "NA") ) expect_equal( sort_tagged_na(x, decreasing = TRUE) %>% format_tagged_na() %>% trimws(), c("2", "2", "1", "1", "NA(z)", "NA(a)", "NA(a)", "NA") ) expect_equal( sort_tagged_na(x, na_decreasing = TRUE) %>% format_tagged_na() %>% trimws(), c("1", "1", "2", "2", "NA(z)", "NA(a)", "NA(a)", "NA") ) expect_equal( sort_tagged_na(x, untagged_na_last = FALSE) %>% format_tagged_na() %>% trimws(), c("1", "1", "2", "2", "NA", "NA(a)", "NA(a)", "NA(z)") ) }) test_that("tagged_na_to_user_na() works as expected", { x <- c( 1, 0, 1, tagged_na("r"), 0, tagged_na("d"), NA, tagged_na("d"), tagged_na("e") ) val_labels(x) <- c( no = 0, yes = 1, "don't know" = tagged_na("d"), refusal = tagged_na("r") ) expect_equal( tagged_na_to_user_na(x), labelled_spss( c(1, 0, 1, 4, 0, 2, NA, 2, 3), labels = c(no = 0, yes = 1, `don't know` = 2, refusal = 4, `NA(e)` = 3), na_range = c(2, 4) ) ) expect_equal( tagged_na_to_user_na(x, user_na_start = 8), labelled_spss( c(1, 0, 1, 10, 0, 8, NA, 8, 9), labels = c(no = 0, yes = 1, `don't know` = 8, refusal = 10, `NA(e)` = 9), na_range = c(8, 10) ) ) }) test_that("tagged_na_to_regular_na() works as expected", { y <- c(1, 0, 1, tagged_na("r"), 0, tagged_na("d")) val_labels(y) <- c( no = 0, yes = 1, "don't know" = tagged_na("d"), refusal = tagged_na("r") ) expect_false(any(tagged_na_to_regular_na(y) %>% is_tagged_na())) test <- rep(c(-99, -99, 3, 5, -1), 120) labelled::na_values(test) <- c(-99, -1) expect_no_warning( user_na_to_tagged_na(test) ) }) labelled/tests/testthat/test-recode_if.r0000644000176200001440000000272414736716451020122 0ustar liggesuserstest_that("recode_if() works as expected", { x <- labelled(c(1, 2, 2, 9), c(yes = 1, no = 2)) y <- x %>% recode_if(x == 9, NA) expect_equal( y, labelled(c(1, 2, 2, NA), c(yes = 1, no = 2)) ) y <- x %>% recode_if(1:4 < 3, 11:14) expect_equal( y, labelled(c(11, 12, 2, 9), c(yes = 1, no = 2)) ) x <- c("A", "B", "C") expect_equal( x %>% recode_if(c(TRUE, FALSE, NA), "Z"), c("Z", "B", "C") ) expect_equal( x %>% recode_if(c(TRUE, FALSE, NA), 0), c("0", "B", "C") ) }) test_that("recode_if() preserve value and variable labels", { x <- labelled_spss(c(1, 2, 2, 8, 9), c(yes = 1, no = 2), na_values = 9) var_label(x) <- "variable label" y <- x %>% recode_if(unclass(x) == 8, NA) expect_equal(var_label(x), var_label(y)) expect_equal(val_labels(x), val_labels(y)) expect_equal(na_values(x), na_values(y)) expect_equal(na_range(x), na_range(y)) }) test_that("recode_if() checks", { expect_no_error( 1:3 %>% recode_if(c(TRUE, FALSE, NA), 9L) ) expect_no_error( 1:3 %>% recode_if(c(TRUE, FALSE, NA), 11:13) ) expect_error( 1:3 %>% recode_if(c(TRUE, FALSE, "NA"), 9) ) expect_error( 1:3 %>% recode_if(c(TRUE, FALSE), 9) ) expect_error( 1:3 %>% recode_if(c(TRUE, FALSE, NA), 10:11) ) expect_warning( 1:3 %>% recode_if(c(TRUE, FALSE, NA), "char") ) expect_warning( 1:3 %>% recode_if(c(TRUE, FALSE, NA), 9) ) }) labelled/tests/testthat.R0000644000176200001440000000063014736716451015160 0ustar liggesusers# This file is part of the standard setup for testthat. # It is recommended that you do not modify it. # # Where should you do additional test configuration? # Learn more about the roles of various files in: # * https://r-pkgs.org/testing-design.html#sec-tests-files-overview # * https://testthat.r-lib.org/articles/special-files.html library(testthat) library(labelled) test_check("labelled") labelled/tests/spelling.R0000644000176200001440000000024014466735327015135 0ustar liggesusersif (requireNamespace("spelling", quietly = TRUE)) { spelling::spell_check_test( vignettes = TRUE, error = FALSE, skip_on_cran = TRUE ) } labelled/MD50000644000176200001440000001264514737436072012353 0ustar liggesusersd284e90a13a0d8e4a349bc99bf4ea974 *DESCRIPTION 45e41848717e7dc66e197420088dc216 *NAMESPACE 1da435b25489e9e266cc8b0578995b22 *NEWS.md f06f8ccc87b0b72eca2089018135670b *R/copy_labels.R 09ee363d0780dd1bac729153253b9c38 *R/data.R 91179eef650f9f96920444a366a694f3 *R/drop_unused_value_labels.R 7ed7d1a66791465efb317a8ec459cb8c *R/import-standalone-obj-type.R c18fe32d0827a9e7ed80ab05ab1966f7 *R/import-standalone-types-check.R 926829f6c475b3453c5c22ab86b1f5bb *R/is_prefixed.R d91025871bd3294794de83148e7439ae *R/labelled-package.R c159cbecf3d962eea36acf1f459a9a53 *R/labelled.R bd0f47284da4751ab0254d89d961be95 *R/lookfor.R 9c84d825529c680aa361eda146921a35 *R/na_values.R a1a65be27ab5b13c2779eb4e3fa33537 *R/recode.R 2d66bd6bf8ee6c30a9c7243ff787c8f8 *R/recode_if.R dfb9e80d4abcf6091f5b3253c1572895 *R/remove_attributes.R 5c6113ea6dd0920835711b58867d5676 *R/remove_labels.R 1dba70b418b38684f0c9d8813f8e5aa6 *R/retrocompatibility.R 9bd740334738f5a82fa57a96e5768bd7 *R/tagged_na.R 55cd9af2f23b9c64c143fbdbc059ff93 *R/to_character.R 81db6536b466f702e382dddf75549bdf *R/to_factor.R b4bb9128ef2e6f2d4d07abc16103dd5f *R/to_labelled.R af386202aa63e30613ef94cb3ace16ab *R/to_na.R 5b2aa3619b3bd909cdc4ac0135419acb *R/update_with.R 7aeadfd7fa9b3d3304851b0b406c0978 *R/val_labels.R 94f2d972ccf5ade176ef5be1dce206f2 *R/var_label.R 1353281161ea5005cb8c1e2bd4af854d *README.md 0bc8e12e8aec2678e86b989f271922ae *build/vignette.rds fd37eb471c738491c60f1b134fc52db7 *data/dta_file.rda 8141b7f85503c9012d892c4f73d05b80 *data/spss_file.rda a0ca560210ed4b201c51fb398692d305 *data/x_haven_2.0.rda cc704a75d2ec00eb4ce538756df69ef3 *data/x_spss_haven_2.0.rda 809b5d6cf784fe292f745dcb3fbfa261 *inst/WORDLIST 957aa403c010b3f21d28082865077a98 *inst/doc/labelled.R ab253ad4c84d8da8b0cc4debf79c4289 *inst/doc/labelled.Rmd df2f32a009622b1391eb27aacd95941d *inst/doc/labelled.html f92ef0fbb814540abb8ac19c0131b979 *inst/doc/look_for.R 4e3f72fc6074e218f0e0065cfa2838df *inst/doc/look_for.Rmd fd8fd97152b28e4d7552da4a1f21d75f *inst/doc/look_for.html bfc4b8d2ecd61e1590c4d2f4af9b69b6 *inst/doc/missing_values.R fb65203c6b65ea639cbf03e2d54d8ddb *inst/doc/missing_values.Rmd 7f18ffadc8291bb399c71b891704c477 *inst/doc/missing_values.html 5ea8171f36373f71f3c230c8f87878de *inst/doc/packed_columns.R f46e9b655437b76429ada5508f319027 *inst/doc/packed_columns.Rmd ee32f5790e7ada33f22a6a745801eab0 *inst/doc/packed_columns.html bd70c40d49d42123da3a3c4e79af5fb5 *man/copy_labels.Rd e4ad038e5f05d0dc17ec00ce216e517d *man/drop_unused_value_labels.Rd cb1e46f469cfbbbde29c8b5113e1d789 *man/figures/lifecycle-archived.svg c0d2e5a54f1fa4ff02bf9533079dd1f7 *man/figures/lifecycle-defunct.svg a1b8c987c676c16af790f563f96cbb1f *man/figures/lifecycle-deprecated.svg c3978703d8f40f2679795335715e98f4 *man/figures/lifecycle-experimental.svg 952b59dc07b171b97d5d982924244f61 *man/figures/lifecycle-maturing.svg 27b879bf3677ea76e3991d56ab324081 *man/figures/lifecycle-questioning.svg 6902bbfaf963fbc4ed98b86bda80caa2 *man/figures/lifecycle-soft-deprecated.svg 53b3f893324260b737b3c46ed2a0e643 *man/figures/lifecycle-stable.svg 1c1fe7a759b86dc6dbcbe7797ab8246c *man/figures/lifecycle-superseded.svg 051f5a77298a1855400207ca4f9fb27f *man/figures/logo.png 557b96e5590bbe43a2ab6029003dacb1 *man/figures/logo.svg fe321cd6e9909debb62538377ef60bca *man/is_prefixed.Rd b91b11844f2e791055aaae4dc2353b14 *man/look_for.Rd 34b9830fee308152e391fdc44fcf2006 *man/na_values.Rd 63c7200e2fa1ce957e0bc547e12f8bb6 *man/names_prefixed_by_values.Rd 5a4cd9d686f9b27f472f73fa510d0098 *man/nolabel_to_na.Rd cd989c15aace5473e68605e719519aa4 *man/recode.haven_labelled.Rd f7c46e8787a98ef1cbe212d6cd60f3ab *man/recode_if.Rd c75242e227838467bcf2357580a22a7f *man/reexports.Rd 5f929af8fd2537259078dd0c299c418a *man/remove_attributes.Rd e6c325bc25e8cf4b5187067d88013eae *man/remove_labels.Rd 28d7a7106c69a79cb8584bfccf62a256 *man/sort_val_labels.Rd 732566c53152f8e6bfe1a1df648ed4d6 *man/tagged_na_to_user_na.Rd cbeeab50b3f169b8795016f2247125f6 *man/test_datasets.Rd 83ebf508b14d68b26b3d0e6dc2cfd8bb *man/to_character.Rd 1a78956ca7a26a8c5bf656f9da168deb *man/to_factor.Rd 9553797e69e6d5c109bff97d8ca2888d *man/to_labelled.Rd 762668221135d56731a6c171ae46addf *man/unique_tagged_na.Rd b6d620d00c03c1eb6f372bd257f2b1e9 *man/update_labelled.Rd b0c10b83e9d9c1d084f5565875d1b5f2 *man/update_variable_labels_with.Rd 7e81aec1eb5aa9d8697de63a42840a17 *man/val_labels.Rd 40461d1267a071fec39efce62830b231 *man/val_labels_to_na.Rd 1dd347164e618b65f83627116dcc8db2 *man/var_label.Rd 52e4cfc6848ac432bf7398e4f7b41889 *tests/spelling.R 4659bd23329c4454baa1819e3da2df34 *tests/testthat.R 219c9b7cd08645492bbf4deefc5add5f *tests/testthat/test-copy_labels.r ab0f191a35ec5f27f746fff7bc5f8b6a *tests/testthat/test-labelled.r 6fd5269c492895ed34ef78978f8eb733 *tests/testthat/test-miscellanous.R 2b809e139569da201f1ff85790ed9c12 *tests/testthat/test-na_values.R 1ff72e80d390ddb60690039290c55600 *tests/testthat/test-recode_if.r 3534d40200f1fb29424f275f6a636140 *tests/testthat/test-tagged_na.r d28940f993eff8b6c2218d838a6e7223 *tests/testthat/test-to_labelled.r 3eac94af5fe4ed8a127b2daeaed3a541 *tests/testthat/test-update_with.r 3ec61f7613d855c64db4538faf8faabb *tests/testthat/test_lookfor.R 06c65f6d55ce89acddd6264225d28355 *vignettes/approaches.drawio d184c748e7df6de59878897889003dad *vignettes/approaches.png ab253ad4c84d8da8b0cc4debf79c4289 *vignettes/labelled.Rmd 4e3f72fc6074e218f0e0065cfa2838df *vignettes/look_for.Rmd fb65203c6b65ea639cbf03e2d54d8ddb *vignettes/missing_values.Rmd f46e9b655437b76429ada5508f319027 *vignettes/packed_columns.Rmd labelled/R/0000755000176200001440000000000014737244525012234 5ustar liggesuserslabelled/R/to_character.R0000644000176200001440000000774614466735327015037 0ustar liggesusers#' Convert input to a character vector #' #' By default, `to_character()` is a wrapper for [base::as.character()]. #' For labelled vector, to_character allows to specify if value, labels or #' labels prefixed with values should be used for conversion. #' #' @param x Object to coerce to a character vector. #' @param ... Other arguments passed down to method. #' @param explicit_tagged_na should tagged NA be kept? #' @export to_character <- function(x, ...) { UseMethod("to_character") } #' @export to_character.default <- function(x, ...) { vl <- var_label(x) x <- as.character(x) var_label(x) <- vl x } #' @rdname to_character #' @export to_character.double <- function(x, explicit_tagged_na = FALSE, ...) { res <- as.character(x) if (explicit_tagged_na) { res[is_tagged_na(x)] <- format_tagged_na(x[is_tagged_na(x)]) } var_label(res) <- var_label(x) names(res) <- names(x) res } #' @rdname to_character #' @param levels What should be used for the factor levels: the labels, the #' values or labels prefixed with values? #' @param nolabel_to_na Should values with no label be converted to `NA`? #' @param user_na_to_na user defined missing values into NA? #' @details #' If some values doesn't have a label, automatic labels will be created, #' except if `nolabel_to_na` is `TRUE`. #' @examples #' v <- labelled( #' c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), #' c(yes = 1, no = 3, "don't know" = 9) #' ) #' to_character(v) #' to_character(v, nolabel_to_na = TRUE) #' to_character(v, "v") #' to_character(v, "p") #' @export to_character.haven_labelled <- function( x, levels = c("labels", "values", "prefixed"), nolabel_to_na = FALSE, user_na_to_na = FALSE, explicit_tagged_na = FALSE, ...) { vl <- var_label(x) levels <- match.arg(levels) x <- as.character(to_factor( x, levels = levels, nolabel_to_na = nolabel_to_na, user_na_to_na = user_na_to_na, explicit_tagged_na = explicit_tagged_na )) var_label(x) <- vl x } #' @rdname to_character #' @param labelled_only for a data.frame, convert only labelled variables to #' factors? #' @details #' When applied to a data.frame, only labelled vectors are converted by #' default to character. Use `labelled_only = FALSE` to convert all variables #' to characters. #' @export #' @examples #' #' df <- data.frame( #' a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), #' b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), #' c = labelled( #' c("a", "a", "b", "c"), #' labels = c(No = "a", Maybe = "b", Yes = "c") #' ), #' d = 1:4, #' e = factor(c("item1", "item2", "item1", "item2")), #' f = c("itemA", "itemA", "itemB", "itemB"), #' stringsAsFactors = FALSE #' ) #' #' if (require(dplyr)) { #' glimpse(df) #' glimpse(to_character(df)) #' glimpse(to_character(df, labelled_only = FALSE)) #' } to_character.data.frame <- function( x, levels = c("labels", "values", "prefixed"), nolabel_to_na = FALSE, user_na_to_na = FALSE, explicit_tagged_na = FALSE, labelled_only = TRUE, ...) { cl <- class(x) x <- dplyr::as_tibble( lapply( x, .to_character_col_data_frame, levels = levels, nolabel_to_na = nolabel_to_na, user_na_to_na = user_na_to_na, explicit_tagged_na = explicit_tagged_na, labelled_only = labelled_only, ... ) ) class(x) <- cl x } .to_character_col_data_frame <- function( x, levels = c("labels", "values", "prefixed"), nolabel_to_na = FALSE, user_na_to_na = FALSE, explicit_tagged_na = FALSE, labelled_only = TRUE, ...) { if (inherits(x, "haven_labelled")) { x <- to_character(x, levels = levels, nolabel_to_na = nolabel_to_na, user_na_to_na = user_na_to_na, explicit_tagged_na = explicit_tagged_na, ... ) } else if (!labelled_only) { x <- to_character(x) } x } labelled/R/import-standalone-types-check.R0000644000176200001440000003154614736716451020246 0ustar liggesusers# Standalone file: do not edit by hand # Source: https://github.com/r-lib/rlang/blob/HEAD/R/standalone-types-check.R # Generated by: usethis::use_standalone("r-lib/rlang", "types-check") # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-types-check.R # last-updated: 2023-03-13 # license: https://unlicense.org # dependencies: standalone-obj-type.R # imports: rlang (>= 1.1.0) # --- # # ## Changelog # # 2024-08-15: # - `check_character()` gains an `allow_na` argument (@martaalcalde, #1724) # # 2023-03-13: # - Improved error messages of number checkers (@teunbrand) # - Added `allow_infinite` argument to `check_number_whole()` (@mgirlich). # - Added `check_data_frame()` (@mgirlich). # # 2023-03-07: # - Added dependency on rlang (>= 1.1.0). # # 2023-02-15: # - Added `check_logical()`. # # - `check_bool()`, `check_number_whole()`, and # `check_number_decimal()` are now implemented in C. # # - For efficiency, `check_number_whole()` and # `check_number_decimal()` now take a `NULL` default for `min` and # `max`. This makes it possible to bypass unnecessary type-checking # and comparisons in the default case of no bounds checks. # # 2022-10-07: # - `check_number_whole()` and `_decimal()` no longer treat # non-numeric types such as factors or dates as numbers. Numeric # types are detected with `is.numeric()`. # # 2022-10-04: # - Added `check_name()` that forbids the empty string. # `check_string()` allows the empty string by default. # # 2022-09-28: # - Removed `what` arguments. # - Added `allow_na` and `allow_null` arguments. # - Added `allow_decimal` and `allow_infinite` arguments. # - Improved errors with absent arguments. # # # 2022-09-16: # - Unprefixed usage of rlang functions with `rlang::` to # avoid onLoad issues when called from rlang (#1482). # # 2022-08-11: # - Added changelog. # # nocov start # Scalars ----------------------------------------------------------------- .standalone_types_check_dot_call <- .Call check_bool <- function(x, ..., allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x) && .standalone_types_check_dot_call(ffi_standalone_is_bool_1.0.7, x, allow_na, allow_null)) { return(invisible(NULL)) } stop_input_type( x, c("`TRUE`", "`FALSE`"), ..., allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } check_string <- function(x, ..., allow_empty = TRUE, allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { is_string <- .rlang_check_is_string( x, allow_empty = allow_empty, allow_na = allow_na, allow_null = allow_null ) if (is_string) { return(invisible(NULL)) } } stop_input_type( x, "a single string", ..., allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } .rlang_check_is_string <- function(x, allow_empty, allow_na, allow_null) { if (is_string(x)) { if (allow_empty || !is_string(x, "")) { return(TRUE) } } if (allow_null && is_null(x)) { return(TRUE) } if (allow_na && (identical(x, NA) || identical(x, na_chr))) { return(TRUE) } FALSE } check_name <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { is_string <- .rlang_check_is_string( x, allow_empty = FALSE, allow_na = FALSE, allow_null = allow_null ) if (is_string) { return(invisible(NULL)) } } stop_input_type( x, "a valid name", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } IS_NUMBER_true <- 0 IS_NUMBER_false <- 1 IS_NUMBER_oob <- 2 check_number_decimal <- function(x, ..., min = NULL, max = NULL, allow_infinite = TRUE, allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (missing(x)) { exit_code <- IS_NUMBER_false } else if (0 == (exit_code <- .standalone_types_check_dot_call( ffi_standalone_check_number_1.0.7, x, allow_decimal = TRUE, min, max, allow_infinite, allow_na, allow_null ))) { return(invisible(NULL)) } .stop_not_number( x, ..., exit_code = exit_code, allow_decimal = TRUE, min = min, max = max, allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } check_number_whole <- function(x, ..., min = NULL, max = NULL, allow_infinite = FALSE, allow_na = FALSE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (missing(x)) { exit_code <- IS_NUMBER_false } else if (0 == (exit_code <- .standalone_types_check_dot_call( ffi_standalone_check_number_1.0.7, x, allow_decimal = FALSE, min, max, allow_infinite, allow_na, allow_null ))) { return(invisible(NULL)) } .stop_not_number( x, ..., exit_code = exit_code, allow_decimal = FALSE, min = min, max = max, allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } .stop_not_number <- function(x, ..., exit_code, allow_decimal, min, max, allow_na, allow_null, arg, call) { if (allow_decimal) { what <- "a number" } else { what <- "a whole number" } if (exit_code == IS_NUMBER_oob) { min <- min %||% -Inf max <- max %||% Inf if (min > -Inf && max < Inf) { what <- sprintf("%s between %s and %s", what, min, max) } else if (x < min) { what <- sprintf("%s larger than or equal to %s", what, min) } else if (x > max) { what <- sprintf("%s smaller than or equal to %s", what, max) } else { abort("Unexpected state in OOB check", .internal = TRUE) } } stop_input_type( x, what, ..., allow_na = allow_na, allow_null = allow_null, arg = arg, call = call ) } check_symbol <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_symbol(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a symbol", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_arg <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_symbol(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "an argument name", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_call <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_call(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a defused call", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_environment <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_environment(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "an environment", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_function <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_function(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a function", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_closure <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_closure(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "an R function", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_formula <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_formula(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a formula", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } # Vectors ----------------------------------------------------------------- # TODO: Figure out what to do with logical `NA` and `allow_na = TRUE` check_character <- function(x, ..., allow_na = TRUE, allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_character(x)) { if (!allow_na && any(is.na(x))) { abort( sprintf("`%s` can't contain NA values.", arg), arg = arg, call = call ) } return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a character vector", ..., allow_null = allow_null, arg = arg, call = call ) } check_logical <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is_logical(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a logical vector", ..., allow_na = FALSE, allow_null = allow_null, arg = arg, call = call ) } check_data_frame <- function(x, ..., allow_null = FALSE, arg = caller_arg(x), call = caller_env()) { if (!missing(x)) { if (is.data.frame(x)) { return(invisible(NULL)) } if (allow_null && is_null(x)) { return(invisible(NULL)) } } stop_input_type( x, "a data frame", ..., allow_null = allow_null, arg = arg, call = call ) } # nocov end labelled/R/to_factor.R0000644000176200001440000002063214737244525014342 0ustar liggesusers#' Convert input to a factor. #' #' The base function [base::as.factor()] is not a generic, but this variant #' is. By default, `to_factor()` is a wrapper for [base::as.factor()]. #' Please note that `to_factor()` differs slightly from [haven::as_factor()] #' method provided by \pkg{haven} package. #' #' @param x Object to coerce to a factor. #' @param ... Other arguments passed down to method. #' @export to_factor <- function(x, ...) { UseMethod("to_factor") } #' @export to_factor.factor <- function(x, ...) { x } #' @export to_factor.default <- function(x, ...) { vl <- var_label(x) x <- as.factor(x) var_label(x) <- vl x } #' @rdname to_factor #' @param levels What should be used for the factor levels: the labels, the #' values or labels prefixed with values? #' @param ordered `TRUE` for ordinal factors, `FALSE` (default) for nominal #' factors. #' @param nolabel_to_na Should values with no label be converted to `NA`? #' @param sort_levels How the factor levels should be sorted? (see Details) #' @param decreasing Should levels be sorted in decreasing order? #' @param drop_unused_labels Should unused value labels be dropped? #' (applied only if `strict = FALSE`) #' @param user_na_to_na Convert user defined missing values into `NA`? #' @param strict Convert to factor only if all values have a defined label? #' @param unclass If not converted to a factor (when `strict = TRUE`), #' convert to a character or a numeric factor by applying [base::unclass()]? #' @param explicit_tagged_na Should tagged NA (cf. [haven::tagged_na()]) be #' kept as explicit factor levels? #' @details #' If some values doesn't have a label, automatic labels will be created, #' except if `nolabel_to_na` is `TRUE`. #' #' If `sort_levels == 'values'`, the levels will be sorted according to the #' values of `x`. #' If `sort_levels == 'labels'`, the levels will be sorted according to #' labels' names. #' If `sort_levels == 'none'`, the levels will be in the order the value #' labels are defined in `x`. If some labels are automatically created, they #' will be added at the end. #' If `sort_levels == 'auto'`, `sort_levels == 'none'` will be used, except #' if some values doesn't have a defined label. In such case, #' `sort_levels == 'values'` will be applied. #' @examples #' v <- labelled( #' c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), #' c(yes = 1, no = 3, "don't know" = 9) #' ) #' to_factor(v) #' to_factor(v, nolabel_to_na = TRUE) #' to_factor(v, "p") #' to_factor(v, sort_levels = "v") #' to_factor(v, sort_levels = "n") #' to_factor(v, sort_levels = "l") #' #' x <- labelled(c("H", "M", "H", "L"), c(low = "L", medium = "M", high = "H")) #' to_factor(x, ordered = TRUE) #' #' # Strict conversion #' v <- labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)) #' to_factor(v) #' to_factor(v, strict = TRUE) # Not converted because 3 does not have a label #' to_factor(v, strict = TRUE, unclass = TRUE) #' @export to_factor.haven_labelled <- function( x, levels = c( "labels", "values", "prefixed" ), ordered = FALSE, nolabel_to_na = FALSE, sort_levels = c("auto", "none", "labels", "values"), decreasing = FALSE, drop_unused_labels = FALSE, user_na_to_na = FALSE, strict = FALSE, unclass = FALSE, explicit_tagged_na = FALSE, ...) { vl <- var_label(x) levels <- match.arg(levels) sort_levels <- match.arg(sort_levels) if (user_na_to_na) { x <- user_na_to_na(x) } if (explicit_tagged_na && is.double(x)) { new_labels <- to_character(val_labels(x), explicit_tagged_na = TRUE) x <- to_character(unclass(x), explicit_tagged_na = TRUE) if (anyNA(new_labels)) { # regular NA with a label x[is.na(x)] <- "NA" new_labels[is.na(new_labels)] <- "NA" } val_labels(x) <- new_labels } else { l <- val_labels(x) val_labels(x) <- l[!is.na(l)] # keeping not NA values } if (strict) { allval <- unique(x) allval <- allval[!is.na(allval)] nolabel <- allval[!allval %in% val_labels(x)] if (length(nolabel) > 0) { if (unclass) { x <- unclass(x) } return(x) } } if (nolabel_to_na) { x <- nolabel_to_na(x) } labels <- val_labels(x) allval <- unique(x) allval <- allval[!is.na(allval)] nolabel <- sort(allval[!allval %in% labels]) # if there are some values with no label if (length(nolabel) > 0) { names(nolabel) <- as.character(nolabel) levs <- c(labels, nolabel) } else { levs <- labels } if (sort_levels == "auto" && length(nolabel) > 0) { sort_levels <- "values" } if (sort_levels == "labels") { levs <- levs[order(names(levs), decreasing = decreasing)] } if (sort_levels == "values") { levs <- sort(levs, decreasing = decreasing) } if (levels == "labels") { labs <- names(levs) } if (levels == "values") { labs <- unname(levs) } if (levels == "prefixed") { labs <- names_prefixed_by_values(levs) } levs <- unname(levs) x <- factor(x, levels = levs, labels = labs, ordered = ordered, ... ) if (drop_unused_labels) { x <- droplevels(x) } var_label(x) <- vl x } #' @rdname to_factor #' @param labelled_only for a data.frame, convert only labelled variables to #' factors? #' @details #' When applied to a data.frame, only labelled vectors are converted by #' default to a factor. Use `labelled_only = FALSE` to convert all variables #' to factors. #' @export to_factor.data.frame <- function( x, levels = c("labels", "values", "prefixed"), ordered = FALSE, nolabel_to_na = FALSE, sort_levels = c("auto", "none", "labels", "values"), decreasing = FALSE, labelled_only = TRUE, drop_unused_labels = FALSE, strict = FALSE, unclass = FALSE, explicit_tagged_na = FALSE, ...) { cl <- class(x) x <- dplyr::as_tibble( lapply( x, .to_factor_col_data_frame, levels = levels, ordered = ordered, nolabel_to_na = nolabel_to_na, sort_levels = sort_levels, decreasing = decreasing, labelled_only = labelled_only, drop_unused_labels = drop_unused_labels, strict = strict, unclass = unclass, explicit_tagged_na = explicit_tagged_na, ... ) ) class(x) <- cl x } .to_factor_col_data_frame <- function( x, levels = c("labels", "values", "prefixed"), ordered = FALSE, nolabel_to_na = FALSE, sort_levels = c("auto", "none", "labels", "values"), decreasing = FALSE, labelled_only = TRUE, drop_unused_labels = FALSE, strict = FALSE, unclass = FALSE, explicit_tagged_na = FALSE, ...) { if (inherits(x, "haven_labelled")) { x <- to_factor(x, levels = levels, ordered = ordered, nolabel_to_na = nolabel_to_na, sort_levels = sort_levels, decreasing = decreasing, drop_unused_labels = drop_unused_labels, strict = strict, unclass = unclass, explicit_tagged_na = explicit_tagged_na, ... ) } else if (!labelled_only) { x <- to_factor(x) } x } #' @rdname to_factor #' @description #' `unlabelled(x)` is a shortcut for #' `to_factor(x, strict = TRUE, unclass = TRUE, labelled_only = TRUE)`. #' @details #' `unlabelled()` is a shortcut for quickly removing value labels of a vector #' or of a data.frame. If all observed values have a value label, then the #' vector will be converted into a factor. Otherwise, the vector will be #' unclassed. #' If you want to remove value labels in all cases, use [remove_val_labels()]. #' @examples #' #' df <- data.frame( #' a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), #' b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), #' c = labelled( #' c("a", "a", "b", "c"), #' labels = c(No = "a", Maybe = "b", Yes = "c") #' ), #' d = 1:4, #' e = factor(c("item1", "item2", "item1", "item2")), #' f = c("itemA", "itemA", "itemB", "itemB"), #' stringsAsFactors = FALSE #' ) #' if (require(dplyr)) { #' glimpse(df) #' glimpse(unlabelled(df)) #' } #' @export unlabelled <- function(x, ...) { if (is.data.frame(x)) { to_factor(x, strict = TRUE, unclass = TRUE, labelled_only = TRUE, ...) } else if (inherits(x, "haven_labelled")) { to_factor(x, strict = TRUE, unclass = TRUE, ...) } else { x } } labelled/R/remove_labels.R0000644000176200001440000001402114736751266015200 0ustar liggesusers#' Remove variable label, value labels and user defined missing values #' #' Use `remove_var_label()` to remove variable label, `remove_val_labels()` #' to remove value labels, `remove_user_na()` to remove user defined missing #' values (*na_values* and *na_range*) and `remove_labels()` to remove all. #' #' @param x A vector or a data frame. #' @param user_na_to_na Convert user defined missing values into `NA`? #' @param user_na_to_tagged_na Convert user defined missing values into #' tagged `NA`? It could be applied only to numeric vectors. Note that integer #' labelled vectors will be converted to double labelled vectors. #' @param keep_var_label Keep variable label? #' @details #' Be careful with `remove_user_na()` and `remove_labels()`, user defined #' missing values will not be automatically converted to `NA`, except if you #' specify `user_na_to_na = TRUE`. #' `user_na_to_na(x)` is an equivalent of #' `remove_user_na(x, user_na_to_na = TRUE)`. #' #' If you prefer to convert variables with value labels into factors, use #' [to_factor()] or use [unlabelled()]. #' @examples #' x <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) #' var_label(x) <- "A variable" #' x #' #' remove_labels(x) #' remove_labels(x, user_na_to_na = TRUE) #' remove_user_na(x, user_na_to_na = TRUE) #' remove_user_na(x, user_na_to_tagged_na = TRUE) #' @export remove_labels <- function(x, user_na_to_na = FALSE, keep_var_label = FALSE, user_na_to_tagged_na = FALSE) { UseMethod("remove_labels") } #' @export remove_labels.default <- function(x, user_na_to_na = FALSE, keep_var_label = FALSE, user_na_to_tagged_na = FALSE) { if (!keep_var_label) var_label(x) <- NULL val_labels(x) <- NULL attr(x, "format.spss") <- NULL x } #' @export remove_labels.haven_labelled_spss <- function(x, user_na_to_na = FALSE, keep_var_label = FALSE, user_na_to_tagged_na = FALSE) { x <- remove_user_na( x, user_na_to_na = user_na_to_na, user_na_to_tagged_na = user_na_to_tagged_na ) if (!keep_var_label) var_label(x) <- NULL val_labels(x) <- NULL attr(x, "format.spss") <- NULL x } #' @export remove_labels.data.frame <- function(x, user_na_to_na = FALSE, keep_var_label = FALSE, user_na_to_tagged_na = FALSE) { x[] <- lapply( x, remove_labels, user_na_to_na = user_na_to_na, keep_var_label = keep_var_label, user_na_to_tagged_na = user_na_to_tagged_na ) x } #' @rdname remove_labels #' @export remove_var_label <- function(x) { UseMethod("remove_var_label") } #' @export remove_var_label.default <- function(x) { var_label(x) <- NULL x } #' @export remove_var_label.data.frame <- function(x) { x[] <- lapply(x, remove_var_label) x } #' @rdname remove_labels #' @export remove_val_labels <- function(x) { UseMethod("remove_val_labels") } #' @export remove_val_labels.default <- function(x) { val_labels(x) <- NULL x } #' @export remove_val_labels.data.frame <- function(x) { x[] <- lapply(x, remove_val_labels) x } #' @rdname remove_labels #' @export remove_user_na <- function(x, user_na_to_na = FALSE, user_na_to_tagged_na = FALSE) { UseMethod("remove_user_na") } #' @export remove_user_na.default <- function(x, user_na_to_na = FALSE, user_na_to_tagged_na = FALSE) { # do nothing x } #' @export remove_user_na.haven_labelled_spss <- function(x, user_na_to_na = FALSE, user_na_to_tagged_na = FALSE) { if (user_na_to_tagged_na) { if (typeof(x) == "character") cli::cli_abort(paste( "{.fn user_na_to_tagged_na} cannot be used with", "character labelled vectors." )) val_to_tag <- x[is.na(x) & !is.na(unclass(x))] %>% unclass() %>% unique() %>% sort() if (length(val_to_tag) > 26) { cli::cli_warn(c( "{length(val_to_tag)} different user-defined missing values found in 'x'.", i = "A maximum of 26 could be tagged.", "!" = "'user_na_to_tagged_na' has been ignored.", i = "'user_na_to_na = TRUE' has been used instead." )) user_na_to_na <- TRUE } else { if (is.integer(x)) { x <- as.double(unclass(x)) %>% copy_labels_from(x) cli::cli_inform(c( i = "{.arg x} has been converted into a double vector." )) val_to_tag <- as.double(val_to_tag) } user_na_to_na <- FALSE vl <- val_labels(x) x <- remove_user_na(x) # to avoid error when combining labelled_spss for (i in seq_along(val_to_tag)) { x[x == val_to_tag[i]] <- tagged_na(letters[i]) if (val_to_tag[i] %in% vl) { vl[vl == val_to_tag[i]] <- tagged_na(letters[i]) } } val_labels(x) <- vl } } if (user_na_to_na) { # removing value labels attached to user_na for ( val in val_labels(x)[test_if_user_na(val_labels(x), na_values(x), na_range(x))] ) { val_label(x, val) <- NULL } x[is.na(x)] <- NA } na_values(x) <- NULL na_range(x) <- NULL x } #' @export remove_user_na.data.frame <- function(x, user_na_to_na = FALSE, user_na_to_tagged_na = FALSE) { x[] <- lapply( x, remove_user_na, user_na_to_na = user_na_to_na, user_na_to_tagged_na = user_na_to_tagged_na ) x } labelled/R/lookfor.R0000644000176200001440000003216114737244525014035 0ustar liggesusers#' Look for keywords variable names and descriptions / Create a data dictionary #' #' `look_for` emulates the `lookfor` Stata command in \R. It supports #' searching into the variable names of regular \R data frames as well as into #' variable labels descriptions, factor levels and value labels. #' The command is meant to help users finding variables in large datasets. #' #' When no keyword is provided, it will produce a data dictionary of the overall #' data frame. #' #' @param data a data frame or a survey object #' @param ... optional list of keywords, a character string (or several #' character strings), which can be formatted as a regular expression suitable #' for a [base::grep()] pattern, or a vector of keywords; #' displays all variables if not specified #' @param labels whether or not to search variable labels (descriptions); #' `TRUE` by default #' @param values whether or not to search within values (factor levels or value #' labels); `TRUE` by default #' @param ignore.case whether or not to make the keywords case sensitive; #' `TRUE` by default (case is ignored during matching) #' @param details add details about each variable (full details could be time #' consuming for big data frames, `FALSE` is equivalent to `"none"` #' and `TRUE` to `"full"`) #' @param x a tibble returned by `look_for()` #' @return a tibble data frame featuring the variable position, name and #' description (if it exists) in the original data frame #' @seealso `vignette("look_for")` #' @details The function looks into the variable names for matches to the #' keywords. If available, variable labels are included in the search scope. #' Variable labels of data.frame imported with \pkg{foreign} or #' \pkg{memisc} packages will also be taken into account (see [to_labelled()]). #' If no keyword is provided, it will return all variables of `data`. #' #' `look_for()`, `lookfor()` and `generate_dictionary()` are equivalent. #' #' By default, results will be summarized when printing. To deactivate default #' printing, use `dplyr::as_tibble()`. #' #' `lookfor_to_long_format()` could be used to transform results with one row #' per factor level and per value label. #' #' Use `convert_list_columns_to_character()` to convert named list columns into #' character vectors (see examples). #' #' `look_for_and_select()` is a shortcut for selecting some variables and #' applying `dplyr::select()` to return a data frame with only the selected #' variables. #' #' @author François Briatte , #' Joseph Larmarange #' @examples #' look_for(iris) #' #' # Look for a single keyword. #' look_for(iris, "petal") #' look_for(iris, "s") #' iris %>% #' look_for_and_select("s") %>% #' head() #' #' # Look for with a regular expression #' look_for(iris, "petal|species") #' look_for(iris, "s$") #' #' # Look for with several keywords #' look_for(iris, "pet", "sp") #' look_for(iris, "pet", "sp", "width") #' look_for(iris, "Pet", "sp", "width", ignore.case = FALSE) #' #' # Look_for can search within factor levels or value labels #' look_for(iris, "vers") #' #' # Quicker search without variable details #' look_for(iris, details = "none") #' #' # To obtain more details about each variable #' look_for(iris, details = "full") #' #' # To deactivate default printing, convert to tibble #' look_for(iris, details = "full") %>% #' dplyr::as_tibble() #' #' # To convert named lists into character vectors #' look_for(iris) %>% convert_list_columns_to_character() #' #' # Long format with one row per factor and per value label #' look_for(iris) %>% lookfor_to_long_format() #' #' # Both functions can be combined #' look_for(iris) %>% #' lookfor_to_long_format() %>% #' convert_list_columns_to_character() #' #' # Labelled data #' d <- dplyr::tibble( #' region = labelled_spss( #' c(1, 2, 1, 9, 2, 3), #' c(north = 1, south = 2, center = 3, missing = 9), #' na_values = 9, #' label = "Region of the respondent" #' ), #' sex = labelled( #' c("f", "f", "m", "m", "m", "f"), #' c(female = "f", male = "m"), #' label = "Sex of the respondent" #' ) #' ) #' look_for(d) #' d %>% #' look_for() %>% #' lookfor_to_long_format() %>% #' convert_list_columns_to_character() #' @source Inspired by the `lookfor` command in Stata. #' @export look_for <- function(data, ..., labels = TRUE, values = TRUE, ignore.case = TRUE, details = c("basic", "none", "full")) { if (inherits(data, c("survey.design", "svyrep.design"))) { data <- data$variables } if (is.logical(details)) { details <- ifelse(details, "full", "none") } else { details <- match.arg(details) } # applying to_labelled data <- to_labelled(data) # search scope n <- names(data) if (!length(n)) cli::cli_abort("There are no names to search in that object.") # search function keywords <- c(...) l <- unlist(var_label(data)) if (!is.null(keywords)) { look <- function(x) { grep(paste(keywords, collapse = "|"), x, ignore.case = ignore.case) } # names search x <- look(n) variable <- n[x] # variable labels if (length(l) > 0 && labels) { # search labels y <- look(l) variable <- unique(c(variable, names(l[y]))) } if (values) { # search factor levels fl <- lapply(data, levels) y <- look(fl) variable <- unique(c(variable, names(fl[y]))) # search value levels vl <- lapply(data, val_labels) y <- look(vl) variable <- unique(c(variable, names(vl[y]))) } } else { variable <- n } # output if (length(variable)) { pos <- which(n %in% variable) # reordering according to pos # not forgetting that some variables don't have a label if (length(l)) { res <- dplyr::tibble(pos = pos, variable = n[pos], label = l[n[pos]]) } else { res <- dplyr::tibble(pos = pos, variable = n[pos], label = NA_character_) } if (details != "none") { data <- data %>% dplyr::select(dplyr::all_of(res$variable)) n_missing <- function(x) { sum(is.na(x)) } res <- res %>% dplyr::mutate( col_type = unlist(lapply(data, vctrs::vec_ptype_abbr)), missing = unlist(lapply(data, n_missing)), levels = lapply(data, levels), value_labels = lapply(data, val_labels), ) } if (details == "full") { data <- data %>% dplyr::select(dplyr::all_of(res$variable)) unique_values <- function(x) { length(unique(x)) } generic_range <- function(x) { if (all(unlist(lapply(x, is.null)))) { return(NULL) } if (all(is.na(x))) { return(NULL) } r <- suppressWarnings(try(range(x, na.rm = TRUE), silent = TRUE)) if (inherits(r, "try-error")) { return(NULL) } r } res <- res %>% dplyr::mutate( class = lapply(data, class), type = unlist(lapply(data, typeof)), na_values = lapply(data, na_values), na_range = lapply(data, na_range), n_na = missing, # retrocompatibility unique_values = unlist(lapply(data, unique_values)), range = lapply(data, generic_range) ) } } else { res <- dplyr::tibble() } # add a look_for class class(res) <- c("look_for", class(res)) res } #' @rdname look_for #' @export lookfor <- look_for #' @rdname look_for #' @export generate_dictionary <- look_for #' @rdname look_for #' @export print.look_for <- function(x, ...) { if (nrow(x) > 0 && all(c("pos", "variable", "label") %in% names(x))) { x <- x %>% lookfor_to_long_format() %>% convert_list_columns_to_character() %>% dplyr::mutate( # display -- when empty label = dplyr::if_else(is.na(.data$label), "\u2014", .data$label) ) if (all(c("value_labels", "levels", "col_type") %in% names(x))) { if (!"range" %in% names(x)) { x$range <- NA_character_ } x <- x %>% dplyr::mutate( values = dplyr::case_when( !is.na(.data$value_labels) ~ .data$value_labels, !is.na(.data$levels) ~ .data$levels, !is.na(.data$range) ~ paste("range:", .data$range), .default = "" # zero-width space ), variable = dplyr::if_else( duplicated(.data$pos), "", .data$variable ), label = dplyr::if_else(duplicated(.data$pos), "", .data$label), col_type = dplyr::if_else(duplicated(.data$pos), "", .data$col_type), ) if ("missing" %in% names(x)) { x <- x %>% dplyr::mutate( missing = dplyr::if_else( duplicated(.data$pos), "", as.character(.data$missing) ) ) } if ("unique_values" %in% names(x)) { x <- x %>% dplyr::mutate( unique_values = dplyr::if_else( duplicated(.data$pos), "", as.character(.data$unique_values) ) ) } if ("na_values" %in% names(x)) { x <- x %>% dplyr::mutate( na_values = dplyr::if_else( duplicated(.data$pos), "", as.character(.data$na_values) ) ) } if ("na_range" %in% names(x)) { x <- x %>% dplyr::mutate( na_range = dplyr::if_else( duplicated(.data$pos), "", as.character(.data$na_range) ) ) } x <- x %>% dplyr::mutate( pos = dplyr::if_else( duplicated(.data$pos), "", as.character(.data$pos) ) ) %>% dplyr::select( dplyr::any_of(c( "pos", "variable", "label", "col_type", "missing", "unique_values", "values", "na_values", "na_range" )) ) } w <- getOption("width") # available width for printing w_pos <- max(3, stringr::str_length(x$pos)) w_variable <- max(5, stringr::str_length(x$variable)) w_label <- max(5, stringr::str_length(x$label)) # nolint if ("values" %in% names(x)) { w_col_type <- max(8, stringr::str_length(x$col_type)) w_values <- max(5, stringr::str_length(x$values)) # nolint w_missing <- max(7, stringr::str_length(x$missing)) # width for labels lw <- w - 8 - w_pos - w_variable - w_col_type - w_missing lw <- dplyr::case_when( w_values < lw / 2 ~ lw - w_values, w_label < lw / 2 ~ lw - w_label, .default = trunc(lw / 2) ) # a minimum of 10 lw <- max(10, lw) x$label <- stringr::str_trunc(x$label, lw, ellipsis = "~") x$values <- stringr::str_trunc(x$values, lw, ellipsis = "~") } else { lw <- w - 4 - w_pos - w_variable lw <- max(10, lw) x$label <- stringr::str_trunc(x$label, lw, ellipsis = "~") } print.data.frame(x, row.names = FALSE, quote = FALSE, right = FALSE) } else if (nrow(x) == 0) { cli::cli_alert_warning("Nothing found. Sorry.") } else { print(dplyr::as_tibble(x)) } } #' @rdname look_for #' @export look_for_and_select <- function( data, ..., labels = TRUE, values = TRUE, ignore.case = TRUE) { lf <- data %>% look_for( ..., labels = labels, values = values, ignore.case = ignore.case, details = "none" ) data %>% dplyr::select(lf$pos) } #' @rdname look_for #' @export convert_list_columns_to_character <- function(x) { if ("range" %in% names(x)) { x <- x %>% dplyr::mutate(range = unlist(lapply(range, paste, collapse = " - "))) } if ("value_labels" %in% names(x) && is.list(x$value_labels)) { x <- x %>% dplyr::mutate(value_labels = names_prefixed_by_values(.data$value_labels)) } x %>% dplyr::as_tibble() %>% # remove look_for class dplyr::mutate( dplyr::across( dplyr::where(is.list), ~ unlist(lapply(.x, paste, collapse = "; ")) ) ) } #' @rdname look_for #' @export lookfor_to_long_format <- function(x) { # only if details are provided if (!"levels" %in% names(x) || !"value_labels" %in% names(x)) { return(x) } x <- x %>% dplyr::as_tibble() %>% # remove look_for class dplyr::mutate(value_labels = names_prefixed_by_values(.data$value_labels)) # tidyr::unnest() fails if all elements are NULL if (all(unlist(lapply(x$levels, is.null)))) { x$levels <- NA_character_ } if (all(unlist(lapply(x$value_labels, is.null)))) { x$value_labels <- NA_character_ } x %>% tidyr::unnest("levels", keep_empty = TRUE) %>% tidyr::unnest("value_labels", keep_empty = TRUE) } labelled/R/labelled-package.R0000644000176200001440000000022514737244525015513 0ustar liggesusers## usethis namespace: start #' @importFrom lifecycle deprecate_soft #' @importFrom dplyr where #' @import rlang ## usethis namespace: end NULL labelled/R/import-standalone-obj-type.R0000644000176200001440000002171014736716451017550 0ustar liggesusers# Standalone file: do not edit by hand # Source: https://github.com/r-lib/rlang/blob/HEAD/R/standalone-obj-type.R # Generated by: usethis::use_standalone("r-lib/rlang", "obj-type") # ---------------------------------------------------------------------- # # --- # repo: r-lib/rlang # file: standalone-obj-type.R # last-updated: 2024-02-14 # license: https://unlicense.org # imports: rlang (>= 1.1.0) # --- # # ## Changelog # # 2024-02-14: # - `obj_type_friendly()` now works for S7 objects. # # 2023-05-01: # - `obj_type_friendly()` now only displays the first class of S3 objects. # # 2023-03-30: # - `stop_input_type()` now handles `I()` input literally in `arg`. # # 2022-10-04: # - `obj_type_friendly(value = TRUE)` now shows numeric scalars # literally. # - `stop_friendly_type()` now takes `show_value`, passed to # `obj_type_friendly()` as the `value` argument. # # 2022-10-03: # - Added `allow_na` and `allow_null` arguments. # - `NULL` is now backticked. # - Better friendly type for infinities and `NaN`. # # 2022-09-16: # - Unprefixed usage of rlang functions with `rlang::` to # avoid onLoad issues when called from rlang (#1482). # # 2022-08-11: # - Prefixed usage of rlang functions with `rlang::`. # # 2022-06-22: # - `friendly_type_of()` is now `obj_type_friendly()`. # - Added `obj_type_oo()`. # # 2021-12-20: # - Added support for scalar values and empty vectors. # - Added `stop_input_type()` # # 2021-06-30: # - Added support for missing arguments. # # 2021-04-19: # - Added support for matrices and arrays (#141). # - Added documentation. # - Added changelog. # # nocov start #' Return English-friendly type #' @param x Any R object. #' @param value Whether to describe the value of `x`. Special values #' like `NA` or `""` are always described. #' @param length Whether to mention the length of vectors and lists. #' @return A string describing the type. Starts with an indefinite #' article, e.g. "an integer vector". #' @noRd obj_type_friendly <- function(x, value = TRUE) { if (is_missing(x)) { return("absent") } if (is.object(x)) { if (inherits(x, "quosure")) { type <- "quosure" } else { type <- class(x)[[1L]] } return(sprintf("a <%s> object", type)) } if (!is_vector(x)) { return(.rlang_as_friendly_type(typeof(x))) } n_dim <- length(dim(x)) if (!n_dim) { if (!is_list(x) && length(x) == 1) { if (is_na(x)) { return(switch( typeof(x), logical = "`NA`", integer = "an integer `NA`", double = if (is.nan(x)) { "`NaN`" } else { "a numeric `NA`" }, complex = "a complex `NA`", character = "a character `NA`", .rlang_stop_unexpected_typeof(x) )) } show_infinites <- function(x) { if (x > 0) { "`Inf`" } else { "`-Inf`" } } str_encode <- function(x, width = 30, ...) { if (nchar(x) > width) { x <- substr(x, 1, width - 3) x <- paste0(x, "...") } encodeString(x, ...) } if (value) { if (is.numeric(x) && is.infinite(x)) { return(show_infinites(x)) } if (is.numeric(x) || is.complex(x)) { number <- as.character(round(x, 2)) what <- if (is.complex(x)) "the complex number" else "the number" return(paste(what, number)) } return(switch( typeof(x), logical = if (x) "`TRUE`" else "`FALSE`", character = { what <- if (nzchar(x)) "the string" else "the empty string" paste(what, str_encode(x, quote = "\"")) }, raw = paste("the raw value", as.character(x)), .rlang_stop_unexpected_typeof(x) )) } return(switch( typeof(x), logical = "a logical value", integer = "an integer", double = if (is.infinite(x)) show_infinites(x) else "a number", complex = "a complex number", character = if (nzchar(x)) "a string" else "\"\"", raw = "a raw value", .rlang_stop_unexpected_typeof(x) )) } if (length(x) == 0) { return(switch( typeof(x), logical = "an empty logical vector", integer = "an empty integer vector", double = "an empty numeric vector", complex = "an empty complex vector", character = "an empty character vector", raw = "an empty raw vector", list = "an empty list", .rlang_stop_unexpected_typeof(x) )) } } vec_type_friendly(x) } vec_type_friendly <- function(x, length = FALSE) { if (!is_vector(x)) { abort("`x` must be a vector.") } type <- typeof(x) n_dim <- length(dim(x)) add_length <- function(type) { if (length && !n_dim) { paste0(type, sprintf(" of length %s", length(x))) } else { type } } if (type == "list") { if (n_dim < 2) { return(add_length("a list")) } else if (is.data.frame(x)) { return("a data frame") } else if (n_dim == 2) { return("a list matrix") } else { return("a list array") } } type <- switch( type, logical = "a logical %s", integer = "an integer %s", numeric = , double = "a double %s", complex = "a complex %s", character = "a character %s", raw = "a raw %s", type = paste0("a ", type, " %s") ) if (n_dim < 2) { kind <- "vector" } else if (n_dim == 2) { kind <- "matrix" } else { kind <- "array" } out <- sprintf(type, kind) if (n_dim >= 2) { out } else { add_length(out) } } .rlang_as_friendly_type <- function(type) { switch( type, list = "a list", NULL = "`NULL`", environment = "an environment", externalptr = "a pointer", weakref = "a weak reference", S4 = "an S4 object", name = , symbol = "a symbol", language = "a call", pairlist = "a pairlist node", expression = "an expression vector", char = "an internal string", promise = "an internal promise", ... = "an internal dots object", any = "an internal `any` object", bytecode = "an internal bytecode object", primitive = , builtin = , special = "a primitive function", closure = "a function", type ) } .rlang_stop_unexpected_typeof <- function(x, call = caller_env()) { abort( sprintf("Unexpected type <%s>.", typeof(x)), call = call ) } #' Return OO type #' @param x Any R object. #' @return One of `"bare"` (for non-OO objects), `"S3"`, `"S4"`, #' `"R6"`, or `"S7"`. #' @noRd obj_type_oo <- function(x) { if (!is.object(x)) { return("bare") } class <- inherits(x, c("R6", "S7_object"), which = TRUE) if (class[[1]]) { "R6" } else if (class[[2]]) { "S7" } else if (isS4(x)) { "S4" } else { "S3" } } #' @param x The object type which does not conform to `what`. Its #' `obj_type_friendly()` is taken and mentioned in the error message. #' @param what The friendly expected type as a string. Can be a #' character vector of expected types, in which case the error #' message mentions all of them in an "or" enumeration. #' @param show_value Passed to `value` argument of `obj_type_friendly()`. #' @param ... Arguments passed to [abort()]. #' @inheritParams args_error_context #' @noRd stop_input_type <- function(x, what, ..., allow_na = FALSE, allow_null = FALSE, show_value = TRUE, arg = caller_arg(x), call = caller_env()) { # From standalone-cli.R cli <- env_get_list( nms = c("format_arg", "format_code"), last = topenv(), default = function(x) sprintf("`%s`", x), inherit = TRUE ) if (allow_na) { what <- c(what, cli$format_code("NA")) } if (allow_null) { what <- c(what, cli$format_code("NULL")) } if (length(what)) { what <- oxford_comma(what) } if (inherits(arg, "AsIs")) { format_arg <- identity } else { format_arg <- cli$format_arg } message <- sprintf( "%s must be %s, not %s.", format_arg(arg), what, obj_type_friendly(x, value = show_value) ) abort(message, ..., call = call, arg = arg) } oxford_comma <- function(chr, sep = ", ", final = "or") { n <- length(chr) if (n < 2) { return(chr) } head <- chr[seq_len(n - 1)] last <- chr[n] head <- paste(head, collapse = sep) # Write a or b. But a, b, or c. if (n > 2) { paste0(head, sep, final, " ", last) } else { paste0(head, " ", final, " ", last) } } # nocov end labelled/R/labelled.R0000644000176200001440000000050614357761455014130 0ustar liggesusers#' @importFrom haven labelled #' @export haven::labelled #' @importFrom haven is.labelled #' @export haven::is.labelled #' @importFrom haven labelled_spss #' @export haven::labelled_spss #' @importFrom haven print_labels #' @export haven::print_labels #' @importFrom dplyr `%>%` #' @export dplyr::`%>%` labelled/R/drop_unused_value_labels.R0000644000176200001440000000132014357761455017424 0ustar liggesusers#' Drop unused value labels #' #' Drop value labels associated to a value not present in the data. #' #' @param x A vector or a data frame. #' @examples #' x <- labelled(c(1, 2, 2, 1), c(yes = 1, no = 2, maybe = 3)) #' x #' drop_unused_value_labels(x) #' @export drop_unused_value_labels <- function(x) { UseMethod("drop_unused_value_labels") } #' @export drop_unused_value_labels.default <- function(x) { # do nothing x } #' @export drop_unused_value_labels.haven_labelled <- function(x) { vl <- val_labels(x) val_labels(x) <- vl[vl %in% unique(x)] x } #' @export drop_unused_value_labels.data.frame <- function(x) { x[] <- lapply(x, drop_unused_value_labels) x } labelled/R/na_values.R0000644000176200001440000002737114737275735014355 0ustar liggesusers#' Get / Set SPSS missing values #' #' @param x A vector (or a data frame). #' @param value A vector of values that should also be considered as missing #' (for `na_values`) or a numeric vector of length two giving the (inclusive) #' extents of the range (for `na_values`, use `-Inf` and `Inf` if you #' want the range to be open ended). #' @details #' See [haven::labelled_spss()] for a presentation of SPSS's user defined #' missing values. #' #' Note that [base::is.na()] will return `TRUE` for user defined missing values. #' It will also return `TRUE` for regular `NA` values. If you want to test if a #' specific value is a user NA but not a regular `NA`, use `is_user_na()`. #' If you want to test if a value is a regular `NA` but not a user NA, not a #' tagged NA, use `is_regular_na()`. #' #' You can use [user_na_to_na()] to convert user defined missing values to #' regular `NA`. Note that any value label attached to a user defined missing #' value will be lost. #' [user_na_to_regular_na()] is a synonym of [user_na_to_na()]. #' #' The method [user_na_to_tagged_na()] will convert user defined missing values #' into [haven::tagged_na()], preserving value labels. Please note that #' [haven::tagged_na()] are defined only for double vectors. Therefore, integer #' `haven_labelled_spss` vectors will be converted into double `haven_labelled` #' vectors; and [user_na_to_tagged_na()] cannot be applied to a character #' `haven_labelled_spss` vector. #' #' [tagged_na_to_user_na()] is the opposite of [user_na_to_tagged_na()] and #' convert tagged `NA` into user defined missing values. #' #' @return #' `na_values()` will return a vector of values that should also be #' considered as missing. #' `na_range()` will return a numeric vector of length two giving the #' (inclusive) extents of the range. #' @seealso [haven::labelled_spss()], [user_na_to_na()] #' @examples #' v <- labelled( #' c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), #' c(yes = 1, no = 3, "don't know" = 9) #' ) #' v #' na_values(v) <- 9 #' na_values(v) #' v #' #' is.na(v) # TRUE for the 6th and 10th values #' is_user_na(v) # TRUE only for the 6th value #' #' user_na_to_na(v) #' na_values(v) <- NULL #' v #' na_range(v) <- c(5, Inf) #' na_range(v) #' v #' user_na_to_na(v) #' user_na_to_tagged_na(v) #' #' # it is not recommended to mix user NAs and tagged NAs #' x <- c(NA, 9, tagged_na("a")) #' na_values(x) <- 9 #' x #' is.na(x) #' is_user_na(x) #' is_tagged_na(x) #' is_regular_na(x) #' #' @export na_values <- function(x) { UseMethod("na_values") } #' @export na_values.default <- function(x) { # return nothing NULL } #' @export na_values.haven_labelled_spss <- function(x) { attr(x, "na_values", exact = TRUE) } #' @export na_values.data.frame <- function(x) { lapply(x, na_values) } #' @rdname na_values #' @export `na_values<-` <- function(x, value) { UseMethod("na_values<-") } #' @export `na_values<-.default` <- function(x, value) { if (!is.null(value)) { x <- labelled_spss( x, val_labels(x), na_values = value, na_range = attr(x, "na_range"), label = var_label(x) ) } # else do nothing x } #' @export `na_values<-.factor` <- function(x, value) { if (!is.null(value)) cli::cli_abort("{.fn na_values}` cannot be applied to factors.") x %>% remove_attributes("na_values") } #' @export `na_values<-.haven_labelled` <- function(x, value) { if (is.null(value)) { attr(x, "na_values") <- NULL if (is.null(attr(x, "na_range"))) { x <- labelled(x, val_labels(x), label = var_label(x)) } } else { x <- labelled_spss( x, val_labels(x), na_values = value, na_range = attr(x, "na_range"), label = var_label(x) ) } x } #' @export `na_values<-.data.frame` <- function(x, value) { if (!is.list(value)) { temp <- as.list(rep(1, ncol(x))) names(temp) <- names(x) value <- lapply(temp, function(x) { x <- value }) } if (!all(names(value) %in% names(x))) { missing_names <- setdiff(names(value), names(x)) cli::cli_abort(c( "Can't find variables {.var {missing_names}} in {.arg x}." )) } for (var in names(value)) { if (!is.null(value[[var]])) { if (mode(x[[var]]) != mode(value[[var]])) cli::cli_abort(paste( "{.arg x} ({class(x)}) and {.arg value} ({class(value)})", "must be same type." )) if (typeof(x[[var]]) != typeof(value[[var]])) { mode(value[[var]]) <- typeof(x[[var]]) } } } for (var in names(value)) na_values(x[[var]]) <- value[[var]] x } #' @rdname na_values #' @export na_range <- function(x) { UseMethod("na_range") } #' @export na_range.default <- function(x) { # return nothing NULL } #' @export na_range.haven_labelled_spss <- function(x) { attr(x, "na_range", exact = TRUE) } #' @export na_range.data.frame <- function(x) { lapply(x, na_range) } #' @rdname na_values #' @export `na_range<-` <- function(x, value) { UseMethod("na_range<-") } #' @export `na_range<-.default` <- function(x, value) { if (!is.null(value)) { x <- labelled_spss( x, val_labels(x), na_values = attr(x, "na_values"), na_range = value, label = var_label(x) ) } # else do nothing x } #' @export `na_range<-.factor` <- function(x, value) { if (!is.null(value)) { cli::cli_abort("{.fn na_range} cannot be applied to factors.") } x %>% remove_attributes("na_range") } #' @export `na_range<-.haven_labelled` <- function(x, value) { if (is.null(value)) { attr(x, "na_range") <- NULL if (is.null(attr(x, "na_values"))) { x <- labelled(x, val_labels(x), label = var_label(x)) } } else { x <- labelled_spss( x, val_labels(x), na_values = attr(x, "na_values"), na_range = value, label = var_label(x) ) } x } #' @export `na_range<-.data.frame` <- function(x, value) { if (!is.list(value)) { temp <- as.list(rep(1, ncol(x))) names(temp) <- names(x) value <- lapply(temp, function(x) { x <- value }) } if (!all(names(value) %in% names(x))) { missing_names <- setdiff(names(value), names(x)) cli::cli_abort(c( "Can't find variables {.var {missing_names}} in {.arg x}." )) } for (var in names(value)) { if (!is.null(value[[var]])) { if (mode(x[[var]]) != mode(value[[var]])) cli::cli_abort(paste( "{.arg x} ({class(x)}) and {.arg value} ({class(value)})", "must be same type." )) if (typeof(x[[var]]) != typeof(value[[var]])) { mode(value[[var]]) <- typeof(x[[var]]) } } } for (var in names(value)) na_range(x[[var]]) <- value[[var]] x } #' @rdname na_values #' @export get_na_values <- na_values #' @rdname na_values #' @export get_na_range <- na_range #' @rdname na_values #' @param .data a data frame or a vector #' @param ... name-value pairs of missing values (see examples) #' @param .values missing values to be applied to the data.frame, #' using the same syntax as `value` in `na_values(df) <- value` or #' `na_range(df) <- value`. #' @param .strict should an error be returned if some labels #' doesn't correspond to a column of `x`? #' @note #' `get_na_values()` is identical to `na_values()` and `get_na_range()` #' to `na_range()`. #' #' `set_na_values()` and `set_na_range()` could be used with \pkg{dplyr} #' syntax. #' @return #' `set_na_values()` and `set_na_range()` will return an updated #' copy of `.data`. #' @examples #' if (require(dplyr)) { #' # setting value label and user NAs #' df <- tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) %>% #' set_value_labels(s2 = c(yes = 1, no = 2)) %>% #' set_na_values(s2 = 9) #' na_values(df) #' #' # removing missing values #' df <- df %>% set_na_values(s2 = NULL) #' df$s2 #' #' # example with a vector #' v <- 1:10 #' v <- v %>% set_na_values(5, 6, 7) #' v #' v %>% set_na_range(8, 10) #' v %>% set_na_range(.values = c(9, 10)) #' v %>% set_na_values(NULL) #' } #' @export set_na_values <- function(.data, ..., .values = NA, .strict = TRUE) { if (!is.data.frame(.data) && !is.atomic(.data)) cli::cli_abort("{.arg .data} should be a data frame or a vector.") # vector case if (is.atomic(.data)) { if (!identical(.values, NA)) { na_values(.data) <- .values } else { na_values(.data) <- unname(unlist(rlang::dots_list(...))) } return(.data) } # data.frame case if (!identical(.values, NA)) { if (!.strict) { .values <- .values[intersect(names(.values), names(.data))] } na_values(.data) <- .values } values <- rlang::dots_list(...) if (.strict && !all(names(values) %in% names(.data))) { missing_names <- setdiff(names(values), names(.data)) cli::cli_abort(c( "Can't find variables {.var {missing_names}} in {.arg .data}." )) } for (v in intersect(names(values), names(.data))) { na_values(.data[[v]]) <- values[[v]] } .data } #' @rdname na_values #' @export set_na_range <- function(.data, ..., .values = NA, .strict = TRUE) { if (!is.data.frame(.data) && !is.atomic(.data)) cli::cli_abort("{.arg .data} should be a data frame or a vector.") # vector case if (is.atomic(.data)) { if (!identical(.values, NA)) { na_range(.data) <- .values } else { na_range(.data) <- unname(unlist(rlang::dots_list(...))) } return(.data) } # data.frame case if (!identical(.values, NA)) { if (!.strict) { .values <- .values[intersect(names(.values), names(.data))] } na_range(.data) <- .values } values <- rlang::dots_list(...) if (.strict && !all(names(values) %in% names(.data))) { missing_names <- setdiff(names(values), names(.data)) cli::cli_abort(c( "Can't find variables {.var {missing_names}} in {.arg .data}." )) } for (v in intersect(names(values), names(.data))) { na_range(.data[[v]]) <- values[[v]] } .data } # internal function to test if a value is user_na test_if_user_na <- function(val, na_values = NULL, na_range = NULL) { if (inherits(val, "haven_labelled")) { val <- unclass(val) } miss <- rep.int(FALSE, length(val)) if (!is.null(na_values)) { miss <- miss | val %in% na_values } if (!is.null(na_range) && is.numeric(val)) { miss <- miss | (val >= na_range[1] & val <= na_range[2] & !is.na(val)) } miss } #' @rdname na_values #' @export is_user_na <- function(x) { test_if_user_na(x, na_values(x), na_range(x)) } #' @rdname na_values #' @export is_regular_na <- function(x) { is.na(x) & !is_user_na(x) & !is_tagged_na(x) } #' @rdname na_values #' @export user_na_to_na <- function(x) { UseMethod("user_na_to_na") } #' @rdname na_values #' @export user_na_to_regular_na <- user_na_to_na #' @export user_na_to_na.default <- function(x) { # do nothing x } #' @export user_na_to_na.haven_labelled_spss <- function(x) { remove_user_na(x, user_na_to_na = TRUE) } #' @export user_na_to_na.data.frame <- function(x) { x[] <- lapply(x, user_na_to_na) x } #' @rdname na_values #' @export user_na_to_tagged_na <- function(x) { UseMethod("user_na_to_tagged_na") } #' @export user_na_to_tagged_na.default <- function(x) { # do nothing x } #' @export user_na_to_tagged_na.haven_labelled_spss <- function(x) { remove_user_na(x, user_na_to_tagged_na = TRUE) } #' @export user_na_to_tagged_na.data.frame <- function(x) { x[] <- lapply(x, user_na_to_tagged_na) x } labelled/R/retrocompatibility.R0000644000176200001440000000423314466735327016312 0ustar liggesusers#' Update labelled data to last version #' #' Labelled data imported with \pkg{haven} version 1.1.2 or before or #' created with [haven::labelled()] version 1.1.0 or before was using #' "labelled" and "labelled_spss" classes. #' #' Since version 2.0.0 of these two packages, "haven_labelled" and #' "haven_labelled_spss" are used instead. #' #' Since haven 2.3.0, "haven_labelled" class has been evolving #' using now \pkg{vctrs} package. #' #' `update_labelled()` convert labelled vectors #' from the old to the new classes and to reconstruct all #' labelled vectors with the last version of the package. #' #' @param x An object (vector or data.frame) to convert. #' @seealso [haven::labelled()], [haven::labelled_spss()] #' @export update_labelled <- function(x) { UseMethod("update_labelled") } #' @export update_labelled.default <- function(x) { # return x x } #' @rdname update_labelled #' @export update_labelled.labelled <- function(x) { # update only previous labelled class, but not objects from Hmisc if (!is.null(attr(x, "labels", exact = TRUE))) { if ( is.null(attr(x, "na_values", exact = TRUE)) && is.null(attr(x, "na_range", exact = TRUE)) ) { x <- labelled( x, labels = attr(x, "labels", exact = TRUE), label = attr(x, "label", exact = TRUE) ) } else { x <- labelled_spss( x, na_values = attr(x, "na_values", exact = TRUE), na_range = attr(x, "range", exact = TRUE), labels = attr(x, "labels", exact = TRUE), label = attr(x, "label", exact = TRUE) ) } } x } #' @rdname update_labelled #' @export update_labelled.haven_labelled_spss <- function(x) { labelled_spss( x, labels = val_labels(x), label = var_label(x), na_values = na_values(x), na_range = na_range(x) ) } #' @rdname update_labelled #' @export update_labelled.haven_labelled <- function(x) { labelled( x, labels = val_labels(x), label = var_label(x) ) } #' @rdname update_labelled #' @export update_labelled.data.frame <- function(x) { x[] <- lapply(x, update_labelled) x } labelled/R/recode_if.R0000644000176200001440000000346314736751266014310 0ustar liggesusers#' Recode some values based on condition #' #' @param x vector to be recoded #' @param condition logical vector of same length as `x` #' @param true values to use for `TRUE` values of `condition`. It must be #' either the same length as `x`, or length 1. #' @return Returns `x` with values replaced by `true` when `condition` is #' `TRUE` and unchanged when `condition` is `FALSE` or `NA`. Variable and value #' labels are preserved unchanged. #' @export #' @examples #' v <- labelled(c(1, 2, 2, 9), c(yes = 1, no = 2)) #' v %>% recode_if(v == 9, NA) #' if (require(dplyr)) { #' df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 2, 1)) %>% #' set_value_labels( #' s1 = c(Male = "M", Female = "F"), #' s2 = c(A = 1, B = 2) #' ) %>% #' set_variable_labels(s1 = "Gender", s2 = "Group") #' #' df <- df %>% #' mutate( #' s3 = s2 %>% recode_if(s1 == "F", 2), #' s4 = s2 %>% recode_if(s1 == "M", s2 + 10) #' ) #' df #' df %>% look_for() #' } recode_if <- function(x, condition, true) { check_logical(condition) if (length(x) != length(condition)) cli::cli_abort(paste( "{.arg condition} (length: {length(condition)}) and", "{.arg x} (length: {length(x)}) should have the same length." )) if (length(true) > 1 && length(true) != length(x)) cli::cli_abort( "{.arg true} should be unique or of same length as {.arg x}." ) original_class <- class(x) condition[is.na(condition)] <- FALSE if (length(true) == 1) { x[condition] <- true } else { x[condition] <- true[condition] } if (!identical(class(x), original_class)) cli::cli_warn(paste( "Class of {.arg x} (originally {.field {original_class}}) has changed", "and was coerced to {.field {class(x)}}." )) x } labelled/R/recode.R0000644000176200001440000000665014737244525013627 0ustar liggesusers#' Recode values #' #' Extend [dplyr::recode()] method from \pkg{dplyr} to #' works with labelled vectors. #' #' @importFrom dplyr recode #' @inheritParams dplyr::recode #' @param .keep_value_labels If `TRUE`, keep original value labels. #' If `FALSE`, remove value labels. #' @param .combine_value_labels If `TRUE`, will combine original value labels #' to generate new value labels. Note that unexpected results could be #' obtained if a same old value is recoded into several different new values. #' @param .sep Separator to be used when combining value labels. #' @seealso [dplyr::recode()] #' @examples #' x <- labelled(1:3, c(yes = 1, no = 2)) #' x #' dplyr::recode(x, `3` = 2L) #' #' # do not keep value labels #' dplyr::recode(x, `3` = 2L, .keep_value_labels = FALSE) #' #' # be careful, changes are not of the same type (here integers), #' # NA arecreated #' dplyr::recode(x, `3` = 2) #' #' # except if you provide .default or new values for all old values #' dplyr::recode(x, `1` = 1, `2` = 1, `3` = 2) #' #' # if you change the type of the vector (here transformed into character) #' # value labels are lost #' dplyr::recode(x, `3` = "b", .default = "a") #' #' # use .keep_value_labels = FALSE to avoid a warning #' dplyr::recode(x, `3` = "b", .default = "a", .keep_value_labels = FALSE) #' #' # combine value labels #' x <- labelled( #' 1:4, #' c( #' "strongly agree" = 1, #' "agree" = 2, #' "disagree" = 3, #' "strongly disagree" = 4 #' ) #' ) #' dplyr::recode( #' x, #' `1` = 1L, #' `2` = 1L, #' `3` = 2L, #' `4` = 2L, #' .combine_value_labels = TRUE #' ) #' dplyr::recode( #' x, #' `2` = 1L, #' `4` = 3L, #' .combine_value_labels = TRUE #' ) #' dplyr::recode( #' x, #' `2` = 1L, #' `4` = 3L, #' .combine_value_labels = TRUE, #' .sep = " or " #' ) #' dplyr::recode( #' x, #' `2` = 1L, #' .default = 2L, #' .combine_value_labels = TRUE #' ) #' #' # example when combining some values without a label #' y <- labelled(1:4, c("strongly agree" = 1)) #' dplyr::recode(y, `2` = 1L, `4` = 3L, .combine_value_labels = TRUE) #' @export recode.haven_labelled <- function( .x, ..., .default = NULL, .missing = NULL, .keep_value_labels = TRUE, .combine_value_labels = FALSE, .sep = " / ") { ret <- dplyr::recode( .x = unclass(.x), ..., .default = .default, .missing = .missing ) if (mode(.x) == mode(ret)) { if (.keep_value_labels) { ret <- copy_labels(.x, ret) } if (.combine_value_labels) { ret <- copy_labels(.x, ret) old_vals <- unique(.x) %>% stats::na.omit() new_vals <- c() for (o in old_vals) { new_vals <- c(new_vals, ret[!is.na(.x) & .x == o][1]) } original_labels <- val_labels(.x) for (v in unique(new_vals)) { combined_label <- names( original_labels[original_labels %in% old_vals[new_vals == v]] ) if (length(combined_label) > 0) { val_label(ret, v) <- paste(combined_label, collapse = .sep) } } ret <- drop_unused_value_labels(ret) } } else { var_label(ret) <- var_label(.x) if (.keep_value_labels || .combine_value_labels) { cli::cli_warn(paste( "The type of {.arg .x} ({mode(ret)}) has been changed", "and value labels have been lost." )) } } ret } labelled/R/update_with.R0000644000176200001440000000625414735465412014701 0ustar liggesusers#' Update variable/value labels with a function #' @param .data A data frame, or data frame extension (e.g. a tibble) #' @param .fn A function used to transform the variable/value labels of the #' selected `.cols`. #' @param .cols Columns to update; defaults to all columns. Use tidy selection. #' @param ... additional arguments passed onto `.fn`. #' @details #' For `update_variable_labels_with()`, it is possible to access the name of #' the variable inside `.fn` by using `names()`, i.e. `.fn` receive a named #' character vector (see example). `.fn` can return `as.character(NA)` to #' remove a variable label. #' @examples #' df <- iris %>% #' set_variable_labels( #' Sepal.Length = "Length of sepal", #' Sepal.Width = "Width of sepal", #' Petal.Length = "Length of petal", #' Petal.Width = "Width of petal", #' Species = "Species" #' ) #' df$Species <- to_labelled(df$Species) #' df %>% look_for() #' df %>% #' update_variable_labels_with(toupper) %>% #' look_for() #' #' # accessing variable names with names() #' df %>% #' update_variable_labels_with(function(x){tolower(names(x))}) %>% #' look_for() #' #' df %>% #' update_variable_labels_with(toupper, .cols = dplyr::starts_with("S")) %>% #' look_for() #' @export update_variable_labels_with <- function(.data, .fn, .cols = dplyr::everything(), ...) { UseMethod("update_variable_labels_with") } #' @export update_variable_labels_with.data.frame <- function(.data, .fn, .cols = dplyr::everything(), ...) { .fn <- rlang::as_function(.fn) cols <- tidyselect::eval_select( rlang::enquo(.cols), .data, allow_rename = FALSE ) vl <- var_label(.data, null_action = "na") vl <- vl[names(cols)] vl <- mapply( function(variable, label) { stats::setNames(label, variable) }, names(vl), vl, SIMPLIFY = FALSE ) vl <- lapply(vl, .fn, ...) vl <- lapply(vl, unname) var_label(.data) <- vl .data } #' @export #' @rdname update_variable_labels_with #' @examples #' df %>% #' update_value_labels_with(toupper) %>% #' look_for() update_value_labels_with <- function(.data, .fn, .cols = dplyr::everything(), ...) { UseMethod("update_value_labels_with") } #' @export update_value_labels_with.data.frame <- function(.data, .fn, .cols = dplyr::everything(), ...) { .fn <- rlang::as_function(.fn) cols <- tidyselect::eval_select( rlang::enquo(.cols), .data, allow_rename = FALSE ) for (i in cols) { vl <- val_labels(.data[[i]]) if (!is.null(vl)) { names(vl) <- .fn(names(vl), ...) val_labels(.data[[i]]) <- vl } } .data } labelled/R/val_labels.R0000644000176200001440000003715514737244525014476 0ustar liggesusers#' Get / Set value labels #' #' @param x A vector or a data.frame #' @param prefixed Should labels be prefixed with values? #' @param v A single value. #' @param value A named vector for `val_labels()` (see [haven::labelled()]) or #' a character string for `val_label()`. `NULL` to remove the labels (except #' if `null_action = "labelled"`). #' For data frames, it could also be a named list with a vector of value #' labels per variable. #' @param null_action,.null_action for advanced users, if `value = NULL`, #' unclass the vector (default) or force/keep `haven_labelled` class #' (if `null_action = "labelled"`) #' @return #' `val_labels()` will return a named vector. #' `val_label()` will return a single character string. #' @examples #' v <- labelled( #' c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), #' c(yes = 1, no = 3, "don't know" = 9) #' ) #' val_labels(v) #' val_labels(v, prefixed = TRUE) #' val_label(v, 2) #' val_label(v, 2) <- "maybe" #' v #' val_label(v, 9) <- NULL #' v #' val_labels(v, null_action = "labelled") <- NULL #' v #' val_labels(v) <- NULL #' v #' @export val_labels <- function(x, prefixed = FALSE) { UseMethod("val_labels") } #' @export val_labels.default <- function(x, prefixed = FALSE) { # return nothing NULL } #' @export val_labels.haven_labelled <- function(x, prefixed = FALSE) { labels <- attr(x, "labels", exact = TRUE) if (prefixed) { names(labels) <- names_prefixed_by_values(labels) } labels } #' @export val_labels.data.frame <- function(x, prefixed = FALSE) { lapply(x, val_labels, prefixed = prefixed) } #' @rdname val_labels #' @export `val_labels<-` <- function( x, null_action = c("unclass", "labelled"), value) { UseMethod("val_labels<-") } #' @export `val_labels<-.default` <- function( x, null_action = c("unclass", "labelled"), value) { null_action <- match.arg(null_action) if (!is.null(value) || null_action == "labelled") { x <- labelled(x, value, label = var_label(x)) } # otherwise do nothing x } #' @export `val_labels<-.factor` <- function(x, null_action = c("unclass", "labelled"), value) { null_action <- match.arg(null_action) if (!is.null(value) || null_action == "labelled") cli::cli_abort("Value labels cannot be applied to factors.") x %>% remove_attributes("labels") } #' @export `val_labels<-.numeric` <- function( x, null_action = c("unclass", "labelled"), value) { null_action <- match.arg(null_action) if ((!is.null(value) && length(value) > 0) || null_action == "labelled") { x <- labelled(x, value, label = var_label(x)) } x } #' @export `val_labels<-.character` <- function( x, null_action = c("unclass", "labelled"), value) { null_action <- match.arg(null_action) if ((!is.null(value) && length(value) > 0) || null_action == "labelled") { x <- labelled(x, value, label = var_label(x)) } x } #' @export `val_labels<-.haven_labelled` <- function( x, null_action = c("unclass", "labelled"), value) { null_action <- match.arg(null_action) if (length(value) == 0) { value <- NULL } if (is.null(value) && null_action == "unclass") { x <- unclass(x) attr(x, "labels") <- NULL } else { x <- labelled(x, value, label = var_label(x)) } x } #' @export `val_labels<-.haven_labelled_spss` <- function( x, null_action = c("unclass", "labelled"), value) { null_action <- match.arg(null_action) if (length(value) == 0) { value <- NULL } if ( is.null(value) && is.null(attr(x, "na_values")) && is.null(attr(x, "na_range")) && null_action == "unclass" ) { x <- unclass(x) attr(x, "labels") <- NULL } else { x <- labelled_spss( x, value, na_values = attr(x, "na_values"), na_range = attr(x, "na_range"), label = var_label(x) ) } x } #' @export `val_labels<-.data.frame` <- function( x, null_action = c("unclass", "labelled"), value) { null_action <- match.arg(null_action) if (!is.list(value)) { temp <- as.list(rep(1, ncol(x))) names(temp) <- names(x) value <- lapply(temp, function(x) { x <- value }) } if (!all(names(value) %in% names(x))) { missing_names <- setdiff(names(value), names(x)) cli::cli_abort(c( "Can't find variables {.var {missing_names}} in {.arg x}." )) } for (var in names(value)) { if (!is.null(value[[var]])) { if (mode(x[[var]]) != mode(value[[var]])) cli::cli_abort(paste( "{.arg x} ({class(x)}) and {.arg value} ({class(value)})", "must be same type." )) if (typeof(x[[var]]) != typeof(value[[var]])) { mode(value[[var]]) <- typeof(x[[var]]) } } } for (var in names(value)) { val_labels(x[[var]], null_action = null_action) <- value[[var]] } x } #' @rdname val_labels #' @export val_label <- function(x, v, prefixed = FALSE) { UseMethod("val_label") } #' @export val_label.default <- function(x, v, prefixed = FALSE) { if (length(v) != 1) cli::cli_abort("{.arg v} (length: {length(v)}) should be a single value.") # return nothing NULL } #' @export val_label.haven_labelled <- function(x, v, prefixed = FALSE) { if (length(v) != 1) cli::cli_abort("{.arg v} (length: {length(v)}) should be a single value.") labels <- val_labels(x, prefixed = prefixed) if (v %in% labels) { names(labels)[labels == v] } else { NULL } } #' @export val_label.data.frame <- function(x, v, prefixed = FALSE) { lapply(x, val_label, v = v, prefixed = prefixed) } #' @rdname val_labels #' @export `val_label<-` <- function(x, v, null_action = c("unclass", "labelled"), value) { UseMethod("val_label<-") } #' @export `val_label<-.default` <- function( x, v, null_action = c("unclass", "labelled"), value) { if (length(v) != 1) { cli::cli_abort("{.arg v} (length: {length(v)}) should be a single value.") } check_string(value, allow_null = TRUE) names(value) <- v val_labels(x, null_action = null_action) <- value x } #' @export `val_label<-.haven_labelled` <- function( x, v, null_action = c("unclass", "labelled"), value) { if (length(v) != 1) { cli::cli_abort("{.arg v} (length: {length(v)}) should be a single value.") } check_string(value, allow_null = TRUE) labels <- val_labels(x) if (is.null(value)) { if (v %in% labels) { labels <- labels[labels != v] } } else { if (v %in% labels) { names(labels)[labels == v] <- value } else { names(v) <- value labels <- c(labels, v) } } if (length(labels) == 0) { labels <- NULL } val_labels(x, null_action = null_action) <- labels x } #' @export `val_label<-.numeric` <- `val_label<-.haven_labelled` #' @export `val_label<-.character` <- `val_label<-.haven_labelled` #' @export `val_label<-.data.frame` <- function( x, v, null_action = c("unclass", "labelled"), value) { if (!is.list(value)) { temp <- as.list(rep(1, ncol(x))) names(temp) <- names(x) value <- lapply(temp, function(x) { x <- value }) } value <- value[names(value) %in% names(x)] for (var in names(value)[]) { check_string(value[[var]], allow_null = TRUE, arg = "value") } for (var in names(value)) { val_label(x[[var]], v, null_action = null_action) <- value[[var]] } x } #' @rdname val_labels #' @export get_value_labels <- val_labels #' @rdname val_labels #' @param .data a data frame or a vector #' @param ... name-value pairs of value labels (see examples) #' @param .labels value labels to be applied to the data.frame, #' using the same syntax as `value` in `val_labels(df) <- value`. #' @param .strict should an error be returned if some labels #' doesn't correspond to a column of `x`? #' @note #' `get_value_labels()` is identical to `val_labels()`. #' #' `set_value_labels()`, `add_value_labels()` and `remove_value_labels()` #' could be used with \pkg{dplyr} syntax. #' While `set_value_labels()` will replace the list of value labels, #' `add_value_labels()` and `remove_value_labels()` will update that list #' (see examples). #' #' `set_value_labels()` could also be applied to a vector / a data.frame column. #' In such case, you can provide a vector of value labels using `.labels` or #' several name-value pairs of value labels (see example). #' Similarly, `add_value_labels()` and `remove_value_labels()` could also be #' applied on vectors. #' @return #' `set_value_labels()`, `add_value_labels()` and `remove_value_labels()` will #' return an updated copy of `.data`. #' @examples #' if (require(dplyr)) { #' # setting value labels #' df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) %>% #' set_value_labels( #' s1 = c(Male = "M", Female = "F"), #' s2 = c(Yes = 1, No = 2) #' ) #' val_labels(df) #' #' # updating value labels #' df <- df %>% add_value_labels(s2 = c(Unknown = 9)) #' df$s2 #' #' # removing a value labels #' df <- df %>% remove_value_labels(s2 = 9) #' df$s2 #' #' # removing all value labels #' df <- df %>% set_value_labels(s2 = NULL) #' df$s2 #' #' # example on a vector #' v <- 1:4 #' v <- set_value_labels(v, min = 1, max = 4) #' v #' v %>% set_value_labels(middle = 3) #' v %>% set_value_labels(NULL) #' v %>% set_value_labels(.labels = c(a = 1, b = 2, c = 3, d = 4)) #' v %>% add_value_labels(between = 2) #' v %>% remove_value_labels(4) #' } #' @export set_value_labels <- function( .data, ..., .labels = NA, .strict = TRUE, .null_action = c("unclass", "labelled")) { .null_action <- match.arg(.null_action) if (!is.data.frame(.data) && !is.atomic(.data)) cli::cli_abort("{.arg .data} should be a data frame or a vector.") # vector case if (is.atomic(.data)) { if (!identical(.labels, NA)) { val_labels(.data, null_action = .null_action) <- .labels } else { values <- unlist(rlang::dots_list(...)) val_labels(.data, null_action = .null_action) <- values } return(.data) } # data.frame case if (!identical(.labels, NA)) { if (!.strict) { .labels <- .labels[intersect(names(.labels), names(.data))] } val_labels(.data, null_action = .null_action) <- .labels } values <- rlang::dots_list(...) if (.strict && !all(names(values) %in% names(.data))) { missing_names <- setdiff(names(values), names(.data)) cli::cli_abort(c( "Can't find variables {.var {missing_names}} in {.arg .data}." )) } for (v in intersect(names(values), names(.data))) { val_labels(.data[[v]], null_action = .null_action) <- values[[v]] } .data } #' @rdname val_labels #' @export add_value_labels <- function( .data, ..., .strict = TRUE, .null_action = c("unclass", "labelled")) { .null_action <- match.arg(.null_action) if (!is.data.frame(.data) && !is.atomic(.data)) cli::cli_abort("{.arg .data} should be a data frame or a vector.") # vector case if (is.atomic(.data)) { values <- unlist(rlang::dots_list(...)) if (is.null(names(values)) || any(names(values) == "")) cli::cli_abort("All arguments should be named.") for (v in names(values)) { val_label(.data, values[[v]], null_action = .null_action) <- v } return(.data) } # data.frame case values <- rlang::dots_list(...) if (.strict && !all(names(values) %in% names(.data))) { missing_names <- setdiff(names(values), names(.data)) cli::cli_abort(c( "Can't find variables {.var {missing_names}} in {.arg .data}." )) } for (v in values) { if (is.null(names(v)) || any(names(v) == "")) cli::cli_abort("All arguments should be named vectors.") } for (v in intersect(names(values), names(.data))) { for (l in names(values[[v]])) { val_label(.data[[v]], values[[v]][[l]], null_action = .null_action) <- l } } .data } #' @rdname val_labels #' @export remove_value_labels <- function( .data, ..., .strict = TRUE, .null_action = c("unclass", "labelled")) { .null_action <- match.arg(.null_action) if (!is.data.frame(.data) && !is.atomic(.data)) cli::cli_abort("{.arg .data} should be a data frame or a vector.") # vector case if (is.atomic(.data)) { values <- unlist(rlang::dots_list(...)) for (v in values) { val_label(.data, v, null_action = .null_action) <- NULL } return(.data) } # data.frame case values <- rlang::dots_list(...) if (.strict && !all(names(values) %in% names(.data))) { missing_names <- setdiff(names(values), names(.data)) cli::cli_abort(c( "Can't find variables {.var {missing_names}} in {.arg .data}." )) } for (v in intersect(names(values), names(.data))) { for (l in values[[v]]) { val_label(.data[[v]], l, null_action = .null_action) <- NULL } } .data } #' Sort value labels #' #' Sort value labels according to values or to labels #' #' @param x A labelled vector or a data.frame #' @param according_to According to values or to labels? #' @param decreasing In decreasing order? #' @examples #' v <- labelled(c(1, 2, 3), c(maybe = 2, yes = 1, no = 3)) #' v #' sort_val_labels(v) #' sort_val_labels(v, decreasing = TRUE) #' sort_val_labels(v, "l") #' sort_val_labels(v, "l", TRUE) #' @export sort_val_labels <- function( x, according_to = c("values", "labels"), decreasing = FALSE) { UseMethod("sort_val_labels") } #' @export sort_val_labels.default <- function( x, according_to = c("values", "labels"), decreasing = FALSE) { # do nothing x } #' @export sort_val_labels.haven_labelled <- function(x, according_to = c("values", "labels"), decreasing = FALSE) { according_to <- match.arg(according_to) labels <- val_labels(x) if (!is.null(labels)) { if (according_to == "values") labels <- sort_tagged_na(labels, decreasing = decreasing) if (according_to == "labels") labels <- labels[order(names(labels), decreasing = decreasing)] val_labels(x) <- labels } x } #' @export sort_val_labels.data.frame <- function( x, according_to = c("values", "labels"), decreasing = FALSE) { x[] <- lapply(x, sort_val_labels, according_to = according_to, decreasing = decreasing ) x } #' Turn a named vector into a vector of names prefixed by values #' @param x vector to be prefixed #' @examples #' df <- dplyr::tibble( #' c1 = labelled(c("M", "M", "F"), c(Male = "M", Female = "F")), #' c2 = labelled(c(1, 1, 2), c(Yes = 1, No = 2)) #' ) #' val_labels(df$c1) #' val_labels(df$c1) %>% names_prefixed_by_values() #' val_labels(df) #' val_labels(df) %>% names_prefixed_by_values() #' @export names_prefixed_by_values <- function(x) { UseMethod("names_prefixed_by_values") } #' @export names_prefixed_by_values.default <- function(x) { if (is.null(x)) { return(NULL) } res <- as.character(x) if (is.double(x)) { res[is_tagged_na(x)] <- format_tagged_na(x[is_tagged_na(x)]) } res <- paste0("[", res, "] ", names(x)) names(res) <- names(x) res } #' @export names_prefixed_by_values.list <- function(x) { lapply(x, names_prefixed_by_values) } labelled/R/var_label.R0000644000176200001440000002076114737244525014314 0ustar liggesusers#' Get / Set a variable label #' #' @param x a vector or a data.frame #' @param value a character string or `NULL` to remove the label #' For data frames, with `var_label()`, it could also be a named list or a #' character vector of same length as the number of columns in `x`. #' @param unlist for data frames, return a named vector instead of a list #' @param null_action for data frames, by default `NULL` will be returned for #' columns with no variable label. Use `"fill"` to populate with the column name #' instead, `"skip"` to remove such values from the returned list, `"na"` to #' populate with `NA` or `"empty"` to populate with an empty string (`""`). #' @param recurse if `TRUE`, will apply `var_label()` on packed columns #' (see [tidyr::pack()]) to return the variable labels of each sub-column; #' otherwise, the label of the group of columns will be returned. #' @details #' `get_variable_labels()` is identical to `var_label()`. #' #' For data frames, if you are using `var_label()<-` and if `value` is a #' named list, only elements whose name will match a column of the data frame #' will be taken into account. If `value` is a character vector, labels should #' be in the same order as the columns of the data.frame. #' #' If you are using `label_attribute()<-` or `set_label_attribute()` on a data #' frame, the label attribute will be attached to the data frame itself, not #' to a column of the data frame. #' #' If you are using packed columns (see [tidyr::pack()]), please read the #' dedicated vignette. #' @examples #' var_label(iris$Sepal.Length) #' var_label(iris$Sepal.Length) <- "Length of the sepal" #' \dontrun{ #' View(iris) #' } #' # To remove a variable label #' var_label(iris$Sepal.Length) <- NULL #' # To change several variable labels at once #' var_label(iris) <- c( #' "sepal length", "sepal width", "petal length", #' "petal width", "species" #' ) #' var_label(iris) #' var_label(iris) <- list( #' Petal.Width = "width of the petal", #' Petal.Length = "length of the petal", #' Sepal.Width = NULL, #' Sepal.Length = NULL #' ) #' var_label(iris) #' var_label(iris, null_action = "fill") #' var_label(iris, null_action = "skip") #' var_label(iris, unlist = TRUE) #' #' # #' @export var_label <- function(x, ...) { rlang::check_dots_used() UseMethod("var_label") } var_label_no_check <- function(x, ...) { UseMethod("var_label") } #' @export var_label.default <- function(x, ...) { attr(x, "label", exact = TRUE) } #' @rdname var_label #' @export var_label.data.frame <- function(x, unlist = FALSE, null_action = c("keep", "fill", "skip", "na", "empty"), recurse = FALSE, ...) { if (recurse) { r <- lapply( x, var_label_no_check, unlist = unlist, null_action = null_action, recurse = TRUE ) } else { r <- lapply(x, label_attribute) } null_action <- match.arg(null_action) if (null_action == "fill") { r <- mapply( function(l, n) { if (is.null(l)) n else l }, r, names(r), SIMPLIFY = FALSE ) } if (null_action == "empty") { r <- lapply( r, function(x) { if (is.null(x)) "" else x } ) } if (null_action == "na") { r <- lapply( r, function(x) { if (is.null(x)) NA_character_ else x } ) } if (null_action == "skip") { r <- r[!sapply(r, is.null)] } if (unlist) { r <- lapply( r, function(x) { if (is.null(x)) "" else x } ) r <- base::unlist(r, use.names = TRUE) } r } #' @rdname var_label #' @export `var_label<-` <- function(x, value) { UseMethod("var_label<-") } #' @export `var_label<-.default` <- function(x, value) { label_attribute(x) <- value x } #' @export `var_label<-.data.frame` <- function(x, value) { if ( (!is.character(value) && !is.null(value)) && !is.list(value) || (is.character(value) && length(value) > 1 && length(value) != ncol(x)) ) cli::cli_abort(paste( "{.arg value} should be a named list, NULL, a single character string", "or a character vector of same length than the number of columns", "in {.arg x}." )) if (is.character(value) && length(value) == 1) { value <- as.list(rep(value, ncol(x))) names(value) <- names(x) } if (is.character(value) && length(value) == ncol(x)) { value <- as.list(value) names(value) <- names(x) } if (is.null(value)) { value <- as.list(rep(1, ncol(x))) names(value) <- names(x) value <- lapply(value, function(x) { x <- NULL }) } if (!all(names(value) %in% names(x))) { missing_names <- setdiff(names(value), names(x)) cli::cli_abort(c( "Can't find variables {.var {missing_names}} in {.arg x}." )) } value <- value[names(value) %in% names(x)] for (var in names(value)) label_attribute(x[[var]]) <- value[[var]] x } #' @rdname var_label #' @export get_variable_labels <- var_label #' @rdname var_label #' @param .data a data frame or a vector #' @param ... name-value pairs of variable labels (see examples) #' @param .labels variable labels to be applied to the data.frame, #' using the same syntax as `value` in `var_label(df) <- value`. #' @param .strict should an error be returned if some labels #' doesn't correspond to a column of `x`? #' @note #' `set_variable_labels()` could be used with \pkg{dplyr} syntax. #' @return #' `set_variable_labels()` will return an updated copy of `.data`. #' @examples #' if (require(dplyr)) { #' # adding some variable labels #' df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) %>% #' set_variable_labels(s1 = "Sex", s2 = "Yes or No?") #' var_label(df) #' #' # removing a variable label #' df <- df %>% set_variable_labels(s2 = NULL) #' var_label(df$s2) #' #' # Set labels from dictionary, e.g. as read from external file #' # One description is missing, one has no match #' description <- tibble( #' name = c( #' "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", #' "Something" #' ), #' label = c( #' "Sepal length", "Sepal width", "Petal length", "Petal width", #' "something" #' ) #' ) #' var_labels <- stats::setNames(as.list(description$label), description$name) #' iris_labelled <- iris %>% #' set_variable_labels(.labels = var_labels, .strict = FALSE) #' var_label(iris_labelled) #' #' # defining variable labels derived from variable names #' if (require(snakecase)) { #' iris <- iris %>% #' set_variable_labels(.labels = to_sentence_case(names(iris))) #' var_label(iris) #' } #' #' # example with a vector #' v <- 1:5 #' v <- v %>% set_variable_labels("a variable label") #' v #' v %>% set_variable_labels(NULL) #' } #' @export set_variable_labels <- function(.data, ..., .labels = NA, .strict = TRUE) { # not a data.frame if (!is.data.frame(.data)) { if (!identical(.labels, NA)) { label_attribute(.data) <- .labels } else { label_attribute(.data) <- unname(unlist(rlang::dots_list(...))) } return(.data) } # data.frame case if (!identical(.labels, NA)) { if (!.strict) { .labels <- .labels[intersect(names(.labels), names(.data))] } else { check_character(.labels) } var_label(.data) <- .labels } values <- rlang::dots_list(...) if (length(values) > 0) { if (.strict && !all(names(values) %in% names(.data))) { missing_names <- setdiff(names(values), names(.data)) cli::cli_abort(c( "Can't find variables {.var {missing_names}} in {.arg .data}." )) } for (v in intersect(names(values), names(.data))) { label_attribute(.data[[v]]) <- values[[v]] } } .data } #' @rdname var_label #' @export label_attribute <- function(x) { attr(x, "label", exact = TRUE) } #' @rdname var_label #' @export get_label_attribute <- function(x) { label_attribute(x) } #' @rdname var_label #' @export set_label_attribute <- function(x, value) { check_string(value, allow_null = TRUE, allow_na = TRUE) attr(x, "label") <- value x } #' @rdname var_label #' @export `label_attribute<-` <- set_label_attribute labelled/R/remove_attributes.R0000644000176200001440000000173514466735327016134 0ustar liggesusers#' Remove attributes #' #' This function removes specified attributes. When applied to a data.frame, it #' will also remove recursively the specified attributes to each column of the #' data.frame. #' #' @param x an object #' @param attributes a character vector indicating attributes to remove #' @export #' @examples #' \dontrun{ #' library(haven) #' path <- system.file("examples", "iris.sav", package = "haven") #' d <- read_sav(path) #' str(d) #' d <- remove_attributes(d, "format.spss") #' str(d) #' } remove_attributes <- function(x, attributes) { UseMethod("remove_attributes") } #' @export remove_attributes.default <- function(x, attributes) { for (a in attributes) attr(x, a) <- NULL x } #' @export remove_attributes.data.frame <- function(x, attributes) { cl <- class(x) x <- remove_attributes.default(x, attributes) x <- dplyr::as_tibble( lapply(x, remove_attributes, attributes = attributes) ) class(x) <- cl x } labelled/R/data.R0000644000176200001440000000052014357761455013271 0ustar liggesusers#' Datasets for testing #' #' These datasets are used to test compatibility with foreign (spss_foreign), #' or haven_2.0 (x_haven_2.0, x_spss_haven_2.0) packages #' @rdname test_datasets "x_haven_2.0" #' @rdname test_datasets "x_spss_haven_2.0" #' @rdname test_datasets "spss_file" #' @rdname test_datasets "dta_file" labelled/R/to_labelled.R0000644000176200001440000002134014737244525014625 0ustar liggesusers#' Convert to labelled data #' #' Convert a factor or data imported with \pkg{foreign} or \pkg{memisc} to #' labelled data. #' #' @param x Factor or dataset to convert to labelled data frame #' @param ... Not used #' @details #' `to_labelled()` is a general wrapper calling the appropriate sub-functions. #' #' `memisc_to_labelled()` converts a `memisc::data.set()`]` object created with #' \pkg{memisc} package to a labelled data frame. #' #' `foreign_to_labelled()` converts data imported with [foreign::read.spss()] #' or [foreign::read.dta()] from \pkg{foreign} package to a labelled data frame, #' i.e. using [haven::labelled()]. #' Factors will not be converted. Therefore, you should use #' `use.value.labels = FALSE` when importing with [foreign::read.spss()] or #' `convert.factors = FALSE` when importing with [foreign::read.dta()]. #' #' To convert correctly defined missing values imported with #' [foreign::read.spss()], you should have used `to.data.frame = FALSE` and #' `use.missings = FALSE`. If you used the option `to.data.frame = TRUE`, #' meta data describing missing values will not be attached to the import. #' If you used `use.missings = TRUE`, missing values would have been converted #' to `NA`. #' #' So far, missing values defined in **Stata** are always imported as `NA` by #' [foreign::read.dta()] and could not be retrieved by `foreign_to_labelled()`. #' #' @return A tbl data frame or a labelled vector. #' @seealso [haven::labelled()], [foreign::read.spss()], #' [foreign::read.dta()], `memisc::data.set()`, #' `memisc::importer`, [to_factor()]. #' #' @examples #' \dontrun{ #' # from foreign #' library(foreign) #' sav <- system.file("files", "electric.sav", package = "foreign") #' df <- to_labelled(read.spss( #' sav, #' to.data.frame = FALSE, #' use.value.labels = FALSE, #' use.missings = FALSE #' )) #' #' # from memisc #' library(memisc) #' nes1948.por <- UnZip("anes/NES1948.ZIP", "NES1948.POR", package = "memisc") #' nes1948 <- spss.portable.file(nes1948.por) #' ds <- as.data.set(nes1948) #' df <- to_labelled(ds) #' } #' #' @export to_labelled <- function(x, ...) { UseMethod("to_labelled") } #' @rdname to_labelled #' @export to_labelled.data.frame <- function(x, ...) { foreign_to_labelled(x) } #' @rdname to_labelled #' @export to_labelled.list <- function(x, ...) { foreign_to_labelled(x) } #' @rdname to_labelled #' @export to_labelled.data.set <- function(x, ...) { memisc_to_labelled(x) } #' @rdname to_labelled #' @export to_labelled.importer <- function(x, ...) { memisc_to_labelled(memisc::as.data.set(x)) } #' @rdname to_labelled #' @export foreign_to_labelled <- function(x) { # note: attr(* , 'missings') and attr(*, 'variable.labels') # are lost when applying as.data.frame (if # read.spss(to.data.frame = F)) variable.labels <- attr(x, "variable.labels", exact = TRUE) # read.spss var.labels <- attr(x, "var.labels", exact = TRUE) # read.dta label.table <- attr(x, "label.table", exact = TRUE) # read.dta missings <- attr(x, "missings", exact = TRUE) # read.spss # if imported with read.spss(to.data.frame=FALSE) it's a # list, not a df if (!is.data.frame(x)) { x <- dplyr::as_tibble(x) } # variable labels (read.spss) if (!is.null(variable.labels)) { var_label(x) <- as.list(variable.labels) } # variable labels (read.dta) if (!is.null(var.labels)) { names(var.labels) <- names(x) var_label(x) <- as.list(var.labels) } # value labels (read.spss) for (var in names(x)) { if (!is.null(attr(x[[var]], "value.labels", exact = TRUE))) { val_labels(x[[var]]) <- attr(x[[var]], "value.labels", exact = TRUE ) } attr(x[[var]], "value.labels") <- NULL } # value labels (read.dta) if (!is.null(label.table)) { # taking into account only variables existing in x val_labels(x) <- label.table[intersect(names(label.table), names(x))] } # missing values (read.spss) for (var in names(missings)) { if (missings[[var]]$type %in% c("one", "two", "three")) { na_values(x[[var]]) <- missings[[var]]$value } if (missings[[var]]$type %in% c("range", "range+1")) { na_range(x[[var]]) <- missings[[var]]$value[1:2] } if (missings[[var]]$type == "range+1") { na_values(x[[var]]) <- missings[[var]]$value[3] } } # cleaning read.spss attr(x, "variable.labels") <- NULL attr(x, "missings") <- NULL # cleaning read.dta attr(x, "datalabel") <- NULL attr(x, "time.stamp") <- NULL attr(x, "formats") <- NULL attr(x, "types") <- NULL attr(x, "val.labels") <- NULL attr(x, "var.labels") <- NULL attr(x, "version") <- NULL attr(x, "label.table") <- NULL attr(x, "missing") <- NULL # to tbl_df (if no other class already specified) if (length(class(x)) == 1) { class(x) <- c("tbl_df", "tbl", "data.frame") } x } #' @rdname to_labelled #' @export memisc_to_labelled <- function(x) { if (!inherits(x, "data.set")) { return(x) } rlang::check_installed("memisc", "to convert a data.set.") df <- as.data.frame(x) for (var in names(x)) { if (length(memisc::description(x[[var]])) > 0) { var_label(df[[var]]) <- as.character(memisc::description(x[[var]])) } if (length(memisc::labels(x[[var]])) > 0) { labs <- memisc::labels(x[[var]])@values names(labs) <- memisc::labels(x[[var]])@.Data val_labels(df[[var]]) <- labs } if ( !is.null(memisc::missing.values(x[[var]])) && length(memisc::missing.values(x[[var]])@filter) > 0 ) { na_values(df[[var]]) <- memisc::missing.values(x[[var]])@filter } if ( !is.null(memisc::missing.values(x[[var]])) && length(memisc::missing.values(x[[var]])@range) > 0 ) { na_range(df[[var]]) <- memisc::missing.values(x[[var]])@range } } dplyr::as_tibble(df) } #' @rdname to_labelled #' @param labels When converting a factor only: #' an optional named vector indicating how factor levels should be coded. #' If a factor level is not found in `labels`, it will be converted to `NA`. #' @param .quiet do not display warnings for prefixed factors with duplicated #' codes #' @details #' If you convert a labelled vector into a factor with prefix, i.e. by using #' [`to_factor(levels = "prefixed")`][to_factor()], `to_labelled.factor()` is #' able to reconvert it to a labelled vector with same values and labels. #' @export #' @examples #' # Converting factors to labelled vectors #' f <- factor( #' c("yes", "yes", "no", "no", "don't know", "no", "yes", "don't know") #' ) #' to_labelled(f) #' to_labelled(f, c("yes" = 1, "no" = 2, "don't know" = 9)) #' to_labelled(f, c("yes" = 1, "no" = 2)) #' to_labelled(f, c("yes" = "Y", "no" = "N", "don't know" = "DK")) #' #' s1 <- labelled(c("M", "M", "F"), c(Male = "M", Female = "F")) #' labels <- val_labels(s1) #' f1 <- to_factor(s1) #' f1 #' #' to_labelled(f1) #' identical(s1, to_labelled(f1)) #' to_labelled(f1, labels) #' identical(s1, to_labelled(f1, labels)) #' #' l <- labelled( #' c(1, 1, 2, 2, 9, 2, 1, 9), #' c("yes" = 1, "no" = 2, "don't know" = 9) #' ) #' f <- to_factor(l, levels = "p") #' f #' to_labelled(f) #' identical(to_labelled(f), l) to_labelled.factor <- function(x, labels = NULL, .quiet = FALSE, ...) { vl <- var_label(x) if (is.null(labels)) { # check if levels are formatted as "[code] label" l <- .get_prefixes.factor(x) if (anyNA(l$code) || anyNA(l$code) || any(duplicated(l$code))) { if ( !.quiet && any(duplicated(l$code)) && !anyNA(l$code) && !anyNA(l$code) ) { cli::cli_warn("{.arg x} looks prefixed, but duplicated codes found.") } # normal case labs <- seq_along(levels(x)) names(labs) <- levels(x) x <- labelled(as.numeric(x), labs) } else { # "[code] label" case num_l <- suppressWarnings(as.numeric(l$code)) if (!.quiet && !anyNA(num_l) && any(duplicated(num_l))) { cli::cli_warn("All codes seem numeric but some duplicates found.") } if (!anyNA(num_l) && !any(duplicated(num_l))) { l$code <- as.numeric(l$code) } r <- l$levels names(r) <- l$code levels(x) <- l$code x <- as.character(x) if (is.numeric(l$code)) { x <- as.numeric(x) } names(l$code) <- l$label x <- labelled(x, l$code) } } else { # labels is not NULL r <- rep_len(NA, length(x)) mode(r) <- mode(labels) for (i in seq_along(labels)) { r[x == names(labels)[i]] <- labels[i] } x <- labelled(r, labels) } var_label(x) <- vl x } labelled/R/tagged_na.R0000644000176200001440000001444014737244525014273 0ustar liggesusers#' @importFrom haven tagged_na #' @export haven::tagged_na #' @importFrom haven na_tag #' @export haven::na_tag #' @importFrom haven is_tagged_na #' @export haven::is_tagged_na #' @importFrom haven format_tagged_na #' @export haven::format_tagged_na #' @importFrom haven print_tagged_na #' @export haven::print_tagged_na #' Unique elements, duplicated, ordering and sorting with tagged NAs #' #' These adaptations of [base::unique()], [base::duplicated()], #' [base::order()] and [base::sort()] treats tagged NAs as distinct #' values. #' #' @param x a vector #' @param fromLast logical indicating if duplication should be #' considered from the last #' @export #' @examples #' x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA) #' x %>% print_tagged_na() #' #' unique(x) %>% print_tagged_na() #' unique_tagged_na(x) %>% print_tagged_na() #' #' duplicated(x) #' duplicated_tagged_na(x) #' #' order(x) #' order_tagged_na(x) #' #' sort(x, na.last = TRUE) %>% print_tagged_na() #' sort_tagged_na(x) %>% print_tagged_na() unique_tagged_na <- function(x, fromLast = FALSE) { x[!duplicated_tagged_na(x, fromLast = fromLast)] } #' @export #' @rdname unique_tagged_na duplicated_tagged_na <- function(x, fromLast = FALSE) { if (!is.double(x)) { return(duplicated(x, fromLast = fromLast)) } res <- duplicated(x, fromLast = fromLast, incomparables = NA) if (anyNA(x)) { res[is.na(x)] <- duplicated( format_tagged_na(x[is.na(x)]), fromLast = fromLast ) } res } #' @rdname unique_tagged_na #' @param na.last if `TRUE`, missing values in the data are put last; #' if `FALSE`, they are put first #' @param decreasing should the sort order be increasing or decreasing? #' @param method the method to be used, see [base::order()] #' @param na_decreasing should the sort order for tagged NAs value be #' @param untagged_na_last should untagged `NA`s be sorted after tagged `NA`s? #' increasing or decreasing? #' @export order_tagged_na <- function(x, na.last = TRUE, decreasing = FALSE, method = c("auto", "shell", "radix"), na_decreasing = decreasing, untagged_na_last = TRUE) { if (!is.double(x)) { return( order(x, na.last = na.last, decreasing = decreasing, method = method) ) } res <- order( x, na.last = TRUE, decreasing = decreasing, method = method ) if (anyNA(x)) { n_na <- sum(is.na(x)) if (n_na < length(x)) { res <- res[1:(length(x) - n_na)] } else { res <- NULL } t_na <- format_tagged_na(x) t_na[!is.na(x)] <- NA if (xor(untagged_na_last, na_decreasing)) { t_na[is.na(x) & !is_tagged_na(x)] <- "ZZZ" } na_order <- order( t_na, na.last = TRUE, decreasing = na_decreasing, method = method ) na_order <- na_order[1:n_na] if (na.last) { res <- c(res, na_order) } else { res <- c(na_order, res) } } res } #' @rdname unique_tagged_na #' @export sort_tagged_na <- function(x, decreasing = FALSE, na.last = TRUE, na_decreasing = decreasing, untagged_na_last = TRUE) { x[order_tagged_na( x, decreasing = decreasing, na.last = na.last, na_decreasing = na_decreasing, untagged_na_last = untagged_na_last )] } #' Convert tagged NAs into user NAs #' #' [tagged_na_to_user_na()] is the opposite of [user_na_to_tagged_na()] and #' convert tagged `NA` into user defined missing values (see [labelled_spss()]). #' #' [tagged_na_to_regular_na()] converts tagged NAs into regular NAs. #' #' @param x a vector or a data frame #' @param user_na_start minimum value of the new user na, if `NULL`, #' computed automatically (maximum of observed values + 1) #' @export #' @examples #' x <- c(1:5, tagged_na("a"), tagged_na("z"), NA) #' x #' print_tagged_na(x) #' tagged_na_to_user_na(x) #' tagged_na_to_user_na(x, user_na_start = 10) #' #' y <- c(1, 0, 1, tagged_na("r"), 0, tagged_na("d")) #' val_labels(y) <- c( #' no = 0, yes = 1, #' "don't know" = tagged_na("d"), #' refusal = tagged_na("r") #' ) #' y #' tagged_na_to_user_na(y, user_na_start = 8) #' tagged_na_to_regular_na(y) #' tagged_na_to_regular_na(y) %>% is_tagged_na() tagged_na_to_user_na <- function(x, user_na_start = NULL) { UseMethod("tagged_na_to_user_na") } #' @export tagged_na_to_user_na.default <- function(x, user_na_start = NULL) { # do nothing x } #' @export tagged_na_to_user_na.double <- function(x, user_na_start = NULL) { if (is.null(user_na_start)) { user_na_start <- trunc(max(x, na.rm = TRUE)) + 1 } tn <- x[is_tagged_na(x)] %>% unique_tagged_na() %>% sort_tagged_na() if (length(tn) == 0) { return(x) } labels <- val_labels(x) for (i in seq_along(tn)) { new_val <- user_na_start + i - 1 if (any(x == new_val, na.rm = TRUE)) cli::cli_abort(c( "Value {new_val} is already used in {.arg x}.", i = "Please change {.arg user_na_start}." )) x[is_tagged_na(x, na_tag(tn[i]))] <- new_val if (any(is_tagged_na(labels, na_tag(tn[i])), na.rm = TRUE)) { labels[is_tagged_na(labels, na_tag(tn[i]))] <- new_val } else { names(new_val) <- format_tagged_na(tn[i]) labels <- c(labels, new_val) } } if (length(labels) > 0) { val_labels(x) <- labels } na_range(x) <- c(user_na_start, user_na_start + length(tn) - 1) x } #' @export tagged_na_to_user_na.data.frame <- function(x, user_na_start = NULL) { x[] <- lapply(x, tagged_na_to_user_na, user_na_start = user_na_start) x } #' @rdname tagged_na_to_user_na #' @export tagged_na_to_regular_na <- function(x) { UseMethod("tagged_na_to_regular_na") } #' @export tagged_na_to_regular_na.default <- function(x) { # do nothing x } #' @export tagged_na_to_regular_na.double <- function(x) { x[is_tagged_na(x)] <- NA # removing value labels attached to tagged NAs, if any vl <- val_labels(x) if (any(is_tagged_na(vl))) { val_labels(x) <- vl[!is_tagged_na(vl)] } x } #' @export tagged_na_to_regular_na.data.frame <- function(x) { x[] <- lapply(x, tagged_na_to_regular_na) x } labelled/R/is_prefixed.R0000644000176200001440000000121414737244525014656 0ustar liggesusers#' Check if a factor is prefixed #' @param x a factor #' @export is_prefixed <- function(x) { if (!is.factor(x)) cli::cli_abort(paste( "{.fn is_prefixed} should be used only with a factor", "({.arg x} is {class(x)})." )) l <- .get_prefixes.factor(x) !anyNA(l$code) && !anyNA(l$code) && !any(duplicated(l$code)) } # return a tibble with levels, code and label # if the factor is prefixed, otherwise NA .get_prefixes.factor <- function(x) { dplyr::tibble(levels = levels(x)) %>% tidyr::extract( "levels", c("code", "label"), "^\\[(.+)\\]\\s(.+)$", remove = FALSE ) } labelled/R/to_na.R0000644000176200001440000000261214466735327013464 0ustar liggesusers#' Recode values with no label to NA #' #' For labelled variables, values with no label will be recoded to `NA`. #' #' @param x Object to recode. #' @examples #' v <- labelled(c(1, 2, 9, 1, 9), c(yes = 1, no = 2)) #' nolabel_to_na(v) #' @export nolabel_to_na <- function(x) { UseMethod("nolabel_to_na") } #' @export nolabel_to_na.default <- function(x) { x } #' @export nolabel_to_na.haven_labelled <- function(x) { allval <- unique(x) allval <- allval[!is.na(allval)] nolabel <- allval[!allval %in% val_labels(x)] if (length(nolabel) > 0) { x[x %in% nolabel] <- NA } x } #' @export nolabel_to_na.data.frame <- function(x) { x[] <- lapply(x, nolabel_to_na) x } #' Recode value labels to NA #' #' For labelled variables, values with a label will be recoded to `NA`. #' #' @param x Object to recode. #' @seealso [haven::zap_labels()] #' @examples #' v <- labelled(c(1, 2, 9, 1, 9), c(dk = 9)) #' val_labels_to_na(v) #' @export val_labels_to_na <- function(x) { UseMethod("val_labels_to_na") } #' @export val_labels_to_na.default <- function(x) { x } #' @export val_labels_to_na.haven_labelled <- function(x) { val <- val_labels(x) if (length(val) > 0) { x[x %in% val] <- NA } val_labels(x) <- NULL x } #' @export val_labels_to_na.data.frame <- function(x) { x[] <- lapply(x, val_labels_to_na) x } labelled/R/copy_labels.R0000644000176200001440000000557314736751266014671 0ustar liggesusers#' Copy variable and value labels and SPSS-style missing value #' #' This function copies variable and value labels (including missing values) #' from one vector to another or from one data frame to another data frame. #' For data frame, labels are copied according to variable names, and only #' if variables are the same type in both data frames. #' #' Some base \R functions like [base::subset()] drop variable and #' value labels attached to a variable. `copy_labels` could be used #' to restore these attributes. #' #' `copy_labels_from` is intended to be used with \pkg{dplyr} syntax, #' see examples. #' #' @param from A vector or a data.frame (or tibble) to copy labels from. #' @param to A vector or data.frame (or tibble) to copy labels to. #' @param .strict When `from` is a labelled vector, `to` have to be of the same #' type (numeric or character) in order to copy value labels and SPSS-style #' missing values. If this is not the case and `.strict = TRUE`, an error #' will be produced. If `.strict = FALSE`, only variable label will be #' copied. #' @export #' @examples #' library(dplyr) #' df <- tibble( #' id = 1:3, #' happy = factor(c("yes", "no", "yes")), #' gender = labelled(c(1, 1, 2), c(female = 1, male = 2)) #' ) %>% #' set_variable_labels( #' id = "Individual ID", #' happy = "Are you happy?", #' gender = "Gender of respondent" #' ) #' var_label(df) #' fdf <- df %>% filter(id < 3) #' var_label(fdf) # some variable labels have been lost #' fdf <- fdf %>% copy_labels_from(df) #' var_label(fdf) #' #' # Alternative syntax #' fdf <- subset(df, id < 3) #' fdf <- copy_labels(from = df, to = fdf) copy_labels <- function(from, to, .strict = TRUE) { UseMethod("copy_labels") } #' @export copy_labels.default <- function(from, to, .strict = TRUE) { if (!is.atomic(from)) cli::cli_abort("{.arg from} must be a vector or a data frame.") if (!is.atomic(to)) cli::cli_abort("{.arg to} must be a vector.") var_label(to) <- var_label(from) to } #' @export copy_labels.haven_labelled <- function(from, to, .strict = TRUE) { if (mode(from) != mode(to) && .strict) cli::cli_abort( paste( "{.arg from} ({class(from)}) and {.arg to} ({class(to)})", "must be of same type." ) ) var_label(to) <- var_label(from) if (mode(from) == mode(to)) { val_labels(to) <- val_labels(from) na_range(to) <- na_range(from) na_values(to) <- na_values(from) } to } #' @export copy_labels.data.frame <- function(from, to, .strict = TRUE) { check_data_frame(to) for (var in names(to)) { if (var %in% names(from)) { to[[var]] <- copy_labels(from[[var]], to[[var]], .strict = .strict) } } to } #' @rdname copy_labels #' @export copy_labels_from <- function(to, from, .strict = TRUE) { copy_labels(from, to, .strict = .strict) } labelled/vignettes/0000755000176200001440000000000014737431515014040 5ustar liggesuserslabelled/vignettes/approaches.png0000644000176200001440000016372414357761455016717 0ustar liggesusersPNG  IHDRhtEXtmxfile%3Cmxfile%20host%3D%22Electron%22%20modified%3D%222020-04-28T11%3A17%3A05.579Z%22%20agent%3D%22Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20Win64%3B%20x64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20draw.io%2F12.6.5%20Chrome%2F80.0.3987.86%20Electron%2F8.0.0%20Safari%2F537.36%22%20etag%3D%22liswx8aIsWLA3o-mdAqL%22%20version%3D%2212.6.5%22%20type%3D%22device%22%3E%3Cdiagram%20id%3D%22ExFkcT_Qc6aa_9KxlopS%22%20name%3D%22Page-1%22%3E7VlNc5swEP01PqZjIP46%2BiNOM01m0ubQppeODAsoFVpGyDbk11dgYcDErps4CY57SbRPK4He29VKuGWNg%2FhSkNC%2FQQdYy2w7ccuatEzTaPcH6l%2BKJCtk0NWAJ6ijnQrgjj5CPlKjc%2BpAVHGUiEzSsArayDnYsoIRIXBZdXORVZ8aEg9qwJ1NWB39Th3pr9C%2B2Svwz0A9P3%2Byka8vILmzXknkEweXJci6aFljgShXrSAeA0vJy3lZjZtu6V2%2FmAAu9xlw05ldX329nRpB%2BCWaX0WTQV%2BedfS7ySRfMDhq%2FdpEIX30kBN2UaAjgXPuQDprW1mFzzViqEBDgQ8gZaLFJHOJCvJlwHQvxFT%2BKLXv06k%2BdbQ1ifXMmZHkBpci%2BVE2SqNSsxiWWfm4OkuauAjnwoYd1OTRRoQHcoefufJLeSs9QGtwCRiAeh%2FlIIARSRfVuCI6PL21X6GgamgR%2F0FQPe%2BCsLl%2B0oRIopCrIFRS1eSuirn0qYS7kGTELFVGV4VzKWNjZCiysZZDoO%2FaCo%2BkwN9Q6unafZi5u%2BhfgJAQ7yRM957r%2FEmq5rLIRiNPMb%2BUibnfwRnuHkvKPD%2F0zT1D32pU6Ju10B8jV3EWUeRZ2WilcdJlakWjmVAtL225xJYooqxvqv7yeQCC2gdNFDCcDvSeSpRBt2eR7mESxWw3LFN6Hz9TrD0z5bxRmWJtKxI2A8Ip9%2Bppoh0E2OhkDkdXR6xuw9LjfJsKRMV7EtHoCEnumA0j2fh%2Fwt3KzWDPzctoVp0ffPQj7jpD3i9rjqZ0v8O9cN%2BLodGsom%2FUr4andT7ePAA0IM2O5i75Dmm279E6j%2Bu3T7PHe2%2FxazxdkN5DspjNgntkg%2FwDTBMk7b2mpht0H05kanozJ7bFz6V70x59c70lnp29VNJs6FAIkpQcQqRcRqWZb1OgtGFs1uXOxhfWv%2FjnF%2FIinFZvUATXeikv2ENO8jK3%2Ba2jAXv5KVznGkBzv0bzMAwFEttX6LDGsVqorBJZJYwjhw12NUQY9bgyGbjpDClp1CZsqOGAOk62eT%2BlW1VZF7nMC8D2zfLZ14V9NTFfTZP6jaykyegUNTHabyjK9or51HYE8ZHekmubUf%2F1NiNlFr%2FMrkp18fu2dfEH%3C%2Fdiagram%3E%3C%2Fmxfile%3E^ IDATx^tT? 4!D{!BBB!J$wM:(M RAzU@;컻n.?~swW!ppo@r@@@@@@@@Yx@@@@@@@@4AB& w@@@@@@@@@ dib0HYx@@@@@@@@4AB& w@@@@@@@@@ dib0HYx@@@@@@@@4AB& w@@@@@@@@@,Ο?Oݺu~A7)///YjbZݻwGyf1KOԶm[~/ 8 RϞ=_MNNN4t3!d?4f5jv̙3)w'&       }f'O+͙3'lg̘A > w޴m6=Z4hd0$Juqԩ8qj֬)bbb}ɓ)gΜ+[bGQ֭@,VZ!!!)SW6G4        i!`VbaYqz%b77|CժUKѧRiҤ ͚5GJѢE}NŊӫgꫯ߅Ս~7b>}k"Cm,Ο?~m . 0Xŋ"cx"URիGnnn)%SZnEgdOa}*f"" t\!X8b߸qg_DDٍXd-[P5,ze5RiU<-.VZ7dkQx@@@@@@@^B)s18 lb7Ef(Uop1XLYx.o86uPXZ,H999-8n իjr͛b ,qBֳg_̍+&eĈBkٲ%q,!?LL g. 㜱06l0˥R2.>WIb[Oa,Ҫɘ`bLߵkv΅co{67N;,*RȲ?v  `ֶL%xU YxOtn\)b,%Y3 *-,R \rbᅨܞ ϫAԴiS*_VjY Ydq_ˌf-TBr)S,.ӎ=*lIB~ΐ nN2wPTS1,,xP5.vl藺(Jm *$S FZT8NgVR`զMȡCUV^ѢE]F<~V"gx]VJu:4(U+:pmV\+XZb[|       "BZr%uAo*HsevCGXQC͛7,dKiӦeS Y<ʕ+ܹsS̙c=%$$PuY"d,.˟R|Rv'Ov׮]7=}BCC)***Xx|;wN|$+YbkB3VXKLLx YJˤnhhf?(q Ν;nv6mu> =ףG!^qaWw}bccBbbk͐E,'ݻ-#s<)eT:/^ ~;v"[*U̳d8.Ìx<\~Y[?\7[ , ʛ7-6@@@@@@@@4JBFFBVF[q@@@@@@@4JBFFBVF[q@@@@@@@4J Ya8(Y0>Yx#@@@@@@@@4AB& w@@@@@@@@@pM0I .XA k+j #͑Mءb  gݛ:nDB#VA:pZ H dYY @gj`j# Yk9Ҹ0 /-AXFBVk@F`oY-g?{O}6ؓ ds7$:Cm'Y"j= >?uPIB=odԁj= @RG9uPI?u `OI} $,uPIB:ȩ `O8O?{eOlDu !d `OEN?{O}6ؓ,{G `#8Ȩ !K?{>.r6ؓcSA dٓ>AFHY6ؓ,uqSApSG:~ $!˞7؈2@BRA d:~ $:Cm'YAFpQB:~ $!K}\Cm'j= @Ȳ'} 6":Cm'Y"j= >?uPIB=odԁj= @RG9uPI?u `OI} $,uPIB:ȩ `O8O?{eOlDu !d `OEN?{O}6ؓ,{G `#8Ȩ !K?{>.r6ؓcSA dٓ>AFHY6ؓ,uqSApSG:~ $!˞7؈2@BRA d:~ $:Cm'YAFpQB:~ $!K}\Cm'j= @Ȳ'} 6":Zz ۴'`+S>VZ7kC+V7|S VCG^7+W/ӷ;R){79 Yhk"ӁԵum .J~F[[9rSڼn%9,M+VYjZb M~4lx^KK/W+wү;O j}S7{mA@AF݋U!4s [<.KoY? fmDl3Hc-lKY/Ҥ詔;Wni BvmB:ZY*d1~qPܹtďJUu|#[ƒ"S6^r|{G&SyX4 FM4]ܤղik:|'A~ʒ93! G"@O_O'b]}iWeJyG,)]^|kEV8_y-ݻwW=$s&7CfaIy?Dcp(Кؿ6,uk\Z,diތ *,|f&؈ϕ}No.=~;9;kc(nL 8~L3}ڞ%+xpn vUxIb-x>,{woST/^@)Syܣzh Q_D0[8'QW-ٸ-KC/8J'Lx Ar◣q,DoN,)WeTߐ kRW yzu=vmDUׄe˗S dE@ @RFZ>}J149-,kʉS3q@jۢQpȏ4:.]HQ1Ժy[хNvTJacDq84v|{0[~=!Pqzݴm @A~uQr(KukĎϟYI!EN::ϟ?73Iį(jӢ.oO҈`oz*MOM mLzG 69 B[V$ *<]iH!1}O)t`jkjʵY8RYVCYLjiBxOP$eɒ\uտx*TRF젉ɋyκLZ۞zy-!vlӱmxa HA3Pe Y,;f%=:šSbi8r ʔ0ؚ*p`jߥ}3 gq ^Iz\mש)p a8w5hJ1'd3n>4[fʹ?{LNZ@7{>r3QBlV.꾁j?uGm6Y^?dԽZ=iÖu4cjTI'Ba3 Lofk-XLZ3zQ YɓfRӆ86s9+.\K0 ۷D$tG[qm[sohN.Wt]rp覎F%T9ePu1!EAXʛ}߸y<]3z4lc)+ZGӔ496Z5kcv=M Y֎l,u \Z,)H9iEH[KT_~ĖAXp=S+)7ytLD[7#O4WqiA{y 1~&EО][)<:*VW˗4q\qF['$-9ՓACg愬FR: ._wF@IԩC.ñ?yD._3~wl7&djGYZ,e1+s--WZ4V!FC󵵾i~0PAB x BBВeN4 sL Yܞ%̙XYR@dEYT8 2c샃֏Ŭd=e ~G֎ ,u\ZilpiF2ey w \xTȲW%dq``o̒' q9j\yD.u-tT!K]ZhېW @,u%!g*-x,ZT1YԹ]Ws6x!K-L_Lj2U/5!Kf'Y^Շz ?yǨTl8[V؜E/ҥi"<e-k,i.~"~# Yg!K i"!c>%sjF}V>+\%*Rԉ )&\|XȲW%dqr "a֭Z,gΜj֪OtيTԩ/P%DӤPM Y2^8cOQMBA d|D]n.[ʒNlNe>.s4#kr$Mg2F1"CXWZ}[;>K:3UȲ\Z,N_G?YvBSe)L,O~,Դg{Panu1F1Ժ}w;SO›Dc+#SXW\$=k4.a6}Emľ7ZdtkuIWo7CCC,G͌ Ce#hI Y.\}Rhh(խ[%КřfN{uCÃt.qΜ;M>t)@7d-^. B̪EϞ"[BL")nϞ=ȸ0*,vÏ)h?UXU=O<Ⱥt"EC3eѣ)NKMcǏҼE@"K\VEYqUȲQ,"C|h™p08ct3Xppe;K\ մg+%E(cY 9nդ0J4&\&. ~Aڷ=(Yb&T"O?*4mف̉7_=;F2Pf[1 V@CHg2eI>R]xO,i~kj۹^ aK[ {ZvEYn,mۑ{]|AFxXȚ;wBBB&dܽM~MԠN#MTec?"0:g  jA]E;!ԶE{ʚ5sȏ4:CZ!3c?zn ֎' YһǓt4cJ,mߺ^ƱSx)I ]m$е/e/iVD95N"vm:҅3qCٲ+K_˔Hd\MA*( 6n*U޸ ;og2+ A3nnD\Uo_fkIE|-.2yV%'WrGwtÚFǝ+w^1OBj¤%=?q.ጪ)!Kw<0b<ժ&=]OvA9eZ2ʗeNv"JE_]*).q}S)m۰^F^'_[f(SLU!˖WGuҁCŅu KN%MX8╋軝D]kB];tO>.WO:a֏>.Y&|75嫗hޢmBDcህ/v/tg܀9!v VԲiЦgJv iÖutA1j|EZwz.emc wM8 Z֌VB-?_ Y|qc>&exPM[u=}J.҄T/l=+B̄VK,iwKz|Z|Nb QBw6Z(9eڊgӡ{e =ZwN Sھe=-$tI KΑTr9a6Hn[R2u2>"E0yLY$f|9ծׄwP d{4?^"s̜߄(GEƼf3Edf/!˚5KOu dunÞ9=&_}^!kMMg]U/ YV JZZ^;~@Q dt!K.rnb\I@^Vd9RO*?$g'M (]Ϳe/Yj"B2!d1Re~CaT/2A>ׯ^B;mV\~w=bG+/;Y=i(HfL-W;S>Tq b,lwYf1e-+bL2)#ElB㘈fNݍMi )!ݿ-E._>B^V}нnZўŋ%/paAgO %heiY 7=Nc)w*R0O $p\.w(G _,sBS)klfBd& )w(e͖ *aT-Q VZla\5Dz_B%_Y;z, @tֹ6/k֝(Ɲ/Aߤ%-_8$_Gz<8fmB BH,u_zȽ =i@vL^uɕngU(&a}dr8aKɜ46W Bp5­.^ |>tI|o=~JdzZPinϒ/aZ'ۓBְA؂|gker ։;;&rû1!NGŊ#7Mh1%.zr'ح_2\RyB҅!ˌu5S<eEway4:&u{تjќ$ S̔@Ss-d4p`jߥdp8@3ݸ~f/$'sŜY 򕪉/gq-ɑmˌL[gz 0_@ev,ƏEKKu߱U|Y eQfX<`\Vmg lƾl):]7ݟ /qsVY-YE `;l.% AXY3S&Ac֍?i֞ۨ_Դe3~{Z}~,HI"&čYQqmԷ@oake!y8K"#ߓ:k…E07ڿg;\ YR/̲3ge[={={x\c'ΠϿ+~zq -84g$ ,uo.rZ~6,9Vj -6V BK`e[ܻ v U|dVX^9ҾeK%-ԪX|.6r$XM1tEE:%,3fm{Vըy[]2 ه~-E)6`5NH[K1Q^S1$cJZ)W7Gy'.K7):~a7La!ly4?K WPeᱳ!NAԢmi,+,sk(_۷nX$dk?7Ű sB?d[ci#>/n9ufʱ(gSνD\,A])wzz=+in)7_d=Y,.>SN!: `__6YsDMY֓O/-,s 5 RkM'ﵦ;Iee5K˹tef3gg<ڽc q9,{M*B"{)F1gsZZe[g}!Bzi!~֖Xd4#v9pÓ->VlE  oL3#̇չtK !( 7-| %E6]4\ m3)b{BO?gN vMJ+.&oiBK۳ai{ A̍-=zHwnݠsא/Ҋ)!C׊sHZ^ҽ3 4ԉUYN."4Ua ߸%)_I/)| Pqx*bO-m:9Yf,itk/,Z:V-'+KE? YE bތY?,Oׯ]X1~Bg-VZi$@r !v>$ !,u,={_Z,*mzF2t"|91q8~ؗdM{}i!ؕrxgOJ<668֗/H:t/j]Xԃ\}c~:OSECEFVZ+̘!ˌ%UOaUNNgYb*Q*VX_>dk!V-3gq e+RQ@!-|c"B1Ad.%-NzL' BȂg!+dyp<[cArw#K @!Kfbz )  fXRS wy F=k3[۞5B=E$"+!'~"EYֶg{em{iq8{cv]SĨZt>XRl":iJA~ž[k/Iڜl,dqvOV0b8sY&,bg͒f.򕪉_ S ǴrYƲGt8էZd]x^XN}U;.7۷nR%Tt4e-4ercXX$N Yj?YiP}iܹzˑ_z,xM'f:Rxl"Q2lRӞ*dc1=,Kbd]@ً=}J;ۘBtOGeW&1)2G1m(d#-fʜB*{e d7_r|ee/]LZ63u}GǏ'*Ⱥy^JLRݽs%S̈́b[&M Bϗ!#gcG3E c(ž| R@X ծTdT`řMF6bk!_{^G ck\Z0$dݮm4k ڳ76 L݅.Ϟ=[Qb%BWg{џq%h'?1IS)w.O+# YzS^nL}N5k7V4eJ{goV~-1^VF|Y2"C3ƷϘ8! 5qe343i`8֒~iߥq|f.\w41*؃ȔLޣtĉ4q\=cc5Kuhw)"6:t+D!."8+y"͜'!4k]8{?./d͚M7e.!Ԫ}7];;&"I@7PGar\0ZϞ- &,+,s A\FRAz g؇_$Kޱ[??hǶ &|Ad7qsdJ|x9 +\7!ן!(9ax~ ۷/͉DŽع@ m,~_K_&r9_|I 44*;xZ%ʘ2AFZ#XsYޟ(TRvA 4Y8ԉߎ PPv*X6n[OOB -Q}qY=HYRUխ/MKm/kԢQucr}s+-^P]*'Aޟ2 ly^6~=RBEb)r}{te++i-49bŝJPvݨW>"^a{.-YmZz=ʗh2Fgϟm}5ݐii' YiaڳZY*dܕY $ʒ% Sft^ ="W25VػSZf3FҢ`铔?N<ʗAn: }a0s غAg Ka.oٰJ/k+.[!i}1``k@A܍آʝW2&=&dybliҮsoV݃8őCfzwU:Z_7>KIH4sz]=y>^3p n)dmnzݽWʔRi(ʙ3'MDQLjIY)sbJ({[wnQ b"&Phlh!3eJL[ Qv xag̛F[LS(S'WXqʙ#'-]Xpϝ+n {9tG/Gޜ'<73ztmz)Q!khTD=x@W_k}= ,uzKń>HC%>IeqOYRa߶i-͛/bpX2L)Z\dfsxqc8.{uL7RjӱqӋϩg_`,qX!>B01]dtGѿaBcuzB -l*/_Лo)ˁ۷@ÃtG,!+ѣ/[_la%KYVhU*V1T\%qSc)S#d1Z0?ʛ}HQ S'M`Prv FZuԭ c&CGQTf Yǎ%gρTV}]v5WQhb~S !K<"E.B֭D\sgOQ5TbU!: Oc]޺lU8IO݆)f\Hk![ ,~8+ {& *A>td@>_b\9KqArY[Wf.¥BG,p"frP%TtYɎCE6-(4._HN4i%"NaDanB3SmzqS\Y `q E*eY8Ȩ[G?s4Zn;{QӆSLtB vpai 05>̯otBukգ\cLĉ<y0O;(fztY,pK^OvlӡM'9BOkfWۢbJ!+y\e{NŊ ~<'.j,|bHtp<8Ǐ88=sbKPzMD=q!6rY]Fqh]SYd~rXBX_bŖAq57>cakjޕ#ǓV!Kffc"Yl%tqV,X+ᬙ:q !KO^^mKڅs;,Xbe#v9q%"R ﮈŮiq< 9p4LEX+-ؽG8a2X d`JJ9^vںNRg+;a}rm}^m1n֨ E',6M>yW5kAeaцó˘,c%"gTd˱kc?-`hH*T² 5oҒ8%U2aԜ!dY6^*dq<$=;zEj|Zt, ?x8:bdy38,C9r.ٵИ%KbB*,_iiָE;[]L I*3$w=wE03,YIpX-# Yn,`URWʖ.'.X6m KRZd-jլcHZ,q y۵a:n61ׯmQ{wρFcqz+*dIF]6m9_s}l[Yxj"V!KLYMOeAj֪Oe+V%?b%J }RX/ax?[j?9su*]"u*A i 4):&B/~!P UѧFr4U'YlOk|Zz;UQ+EyiXdB4][c `#l̀= @RGQ,j46dn.˝dxS1Eop7'Xd44Zx=w/1|Gy?K~^ς(2,_''O Q)O<Dk,ޤqStA0Lk}0DߖBtc5#߳ePu+-Z?!KP^"dߤ0J4VPr%ڧ5lښc}Biu5٩Ğ7bm"djR8 pטg6SN=i{?tY7k9.FCSsgNÂ6w-dΉ#E`}S.ְSM{5qSUY6:b2^G³3&&<~#R,xyƲLuޣ(k֬R>}J &{(?*Wx &I YRD+TRdTmD"{U Y|9㬅ѡ:؃\}Rd-d<T=7k!gLS sƲwo]4=avuTL[mYieV/r Yak,_5KVJW&0Hw8S.k21a&>k6={*.cDlu1 ŏèRwXa&E&cY ˨Ag-TZ;|mO}d(9ȬiZhk!ZB\+"kC뵵i; !ؙ,u Bvb1S.:4}n2M(&8z4}Nu؃|=Phd<}R}i "k|dעe)zRm.E6FFBRR9sB 2o~= ~C5)f–Cu}GǏ!n޸FS񒟈z 4&.Yj6Β7!*X-s;,qv.ԬuG<{$EAG݄u֛8X8jӱ6_Ia#Ĝ#)sMwq> p 1E.b,>,-5 g;]m07bW%dq@Zt>@oz+GN]cC|zARhV?sA dق";nU ( l=y; ӣ'C-+Qӆ(7e&`<&oFd,S|<.>@= cN>۵CwcF BIt/"K 3Xז% [-_TJjNηIY`a FM4K5{pm[bH,~ ,hoRTʝ+DĹfys@.̴ZjTLJ9 Y?'P!P1+b '-1u̒"DܲreʋW_m֮Yƒ"E,EN)d#H3]00;mZ+D2+/;#DџA;m E\}slB##S)4b#FVLy~@GDעq!憮D]?WV5K)MuՏ {xh!–ZAB̑u8Y 7Cuj*\ gvÆR)sf|9=gV}/U殅r<AB+T(-^R9 g|ovA-V?uFmH du,28dԽ*d~G̠/տ[Y,` OJ!pfm%+ӞUzԫ[_VSa]# ?]KW-հAzzW^+U,`VP];tO>.'8NO95Wӡ#P"Y oZb{Kpʞ=/ȒۼVhܩmҾ;qy*-$"uEvڱ5HXbɢfªzڸmx7Xjת#ulY'Y <!KT^ Y,TRۈD 5*xl,ZxuXYu_j_`C}xڸp4M;vm,2ebO޾e=-$xrצcOcM L>)J*Z0;vl י>~/vXZ|NjҲ=u= J[r-[4vl[/ fUI+Խ?.[(_ì,u;5oK|PΧ\EB%G5eaE:5[6YMi<؈,D3 `Ow!K P2.Y9uP[ [gUj= @Ȳ'} 6":Cm'Y"j;&,3'SV;@>A%XsFGj= @Ȳ'} 6":Cm'Y"j;&_DCzPDl2}\p}d7'S~E9 FRy:~ $!˞7؈2@BRA d:~82>wuMw٘V :~ $!˞7؈2@BRA d:~8Yʏ?|O+Ce0v74qgrd8[-6ؓ,{G `#8Ȩ !K?{>.r6ؓcSA dٓ>AFHY6ؓ,uqSApSG:~ $!˞7؈2@BRA d:~ $:Cm'YAFpQB:~ $!K}\Cm'j= @Ȳ'} 6":Cm'Y"j= >?uPIB=odԁj= @RG9uPI?u `OI} $,uPIB:ȩ `O8O?{eOlDu !d `OEN?{O}6ؓ,{G `#8Ȩ !K?{>.r6ؓcSA dٓ>AFHY6ؓ,uqSApSG:~ $!˞7؈2@BRA d:~ $:Cm'B=AH𪫮 Z[ Yt6 Cch IDATyIȋ؎ֱg7G"8Ҁ0A&̸,븡8Y֭.rqC-p$8YZ H d9Ҋ`,        F @‹        4L$,         !K˄A@;        4L$,         !K˄A@;        4L$,         !K˄A@;        4L$,         !K˄A@;        4L$,         !K˄A@;        4L$,         !K˄A@;        4L$,         !K˄A@;        4L$,         !K˄A@;        4L$,      c,:8hRB "$b"H$+^Jqw)k̾fnv7sy%c !! 5         d94        @@@@@@@@,&t@@@@@@@@BCӄN@pbIYX        ABCL:         ! k@@@@@@@@!@riB'A@@@@@@@ da 8Y1M$IB֥K{ta #FPDDeΜ@ԳgOڶm~ZuKuڕN}O?U.*L,YBzj/o%Tti9s&թS       6%`Tz9tt4p@ 6n/BmӦ ͚5͛Vqc\       @uiܹ39sj֬)ۢGӦMٳ;P!dUT/_Nlu|X\t)yyyӧOů,ZB-9FDrvvX{Xʟ??}TZ$`BLӦMiܹtA)B:t www*ZVSZBCCEsVa+7hƍ@QvѢE_|6lH!!!TbEׯ_S\ݻʕ+TJ_>%/hƍiW{r%Xb-Z,Y$a… x?u֥N:rs֔'d*UJ͛BTdA81 Y\i!?/@@@@@@@lE Y!ɓ'4h ZjE)___?;C5Svm!2`R Yl#ѶÖbqqq/_>ͯBV.]Dz)S \ʕ+G"jtfajҤIBɘ1,Fg̘ap8>׭,a=,JX't,)mڴI|@ Bӝ}#dq۷o'@@@@@@@lE Y!ȑ#Ԯ];up+dw3Zf uQQFʈYaiah (,b [Yu?#(vL2[nXPc4X=z[_1%?׵tblСvdc 3>CJ z/P1\¢Mf-4d!jl Qhx@@@@@@@Dɓe?luŖF3gN=z=,а[]rB `,4oޜ^zEW2pղeKї%J۷TtthNieG]۷o/D?[C }%/ȏR믿v'V1;wr?JO x2eQNWraaaB@d7B.}c0bJ)d- *]["q:~޼yCcǎ &+E.9]!-MQ elĂ{'d,-[kMI)dq,-4uU7Kx auk.QN9vCu"?Rz%c5jZjEˏ-L g-YE@w<        `K,z-~nJ&}R1~޽"9?{8R@lGi8vcaԩS)[l,H%cM%Sƍ@W 'w8V?LG} Çw۞t,`f5L%/--H Y,HĢOrBXLJʐRE)X5U2T?6N8QĐb#,SӭGfnz왰Fck,~ M3C .        %W[{2E.ӳg.^W -ޙb8`Μ950"K)pºSbܞk֬/_^=XkL2{Csfeu]?B) vAM4IQ[Bnl(]Eďʚ5k!׻n:/4э18?,daݘU)u!Ef&dB I;E03ft9 n<:PՅhL2{B^#GUAJ/X@!KWqwwHʒ%Ku-"MrY2e$ug@',qUqB+,-ս{g-TBrӧ ,~tcOrq|cB߫^~-8RaQxQ%$K u%!HC76XZbVJyI"$׋r )#ckQe"@PZIjE*W۶m+ǎ֭[ xEBĿy&q?](]9  A󹎒%KeɭԃwA@@@@@@@\I5kPǎE}+Sf7dk14dڶmKQti9s&թSGSR>T\,Xd"dqE,.˟R|R6+W.nݺѻ+~ $}tE񻄄!^ɇ-(D:} c";cWB]l(       jh YpkKLLull֭\#ճgO!^îj~!EGG " ŮEn6CgSL}[.WWLAB7|ڽ{٪TB`E'~qf2lT6m^[۷ A/_8 !?5jX>S c~ohQ@zFfbNk        Rz}+p]B֒%K(wܖpPtmHo dxA@@@@@@@A @rЉCA@@@@@@@ f%`5!Ay        vJBN         MBVCӄN@pNC0H nD=1fB{"ϼوƼ( `7Ew8RGpsB_A@?YX "g$ c7{"ϼـe7{"!˼,uPlIB:8 `KlIm"$24ؒ?u!d `KO?[eKh,D9u qQA!K?[>4ؒ,[G `!ȩ:~( $O}Y4ؒ,uqSA dْ> ENHdCi%CRA d:~( $!˖6X.r@ J- `SGB:~( $!K}Ci%YABpSuPlI:Ci%YJ- @Ȳ%} ":8Ȩ `KчJ- @RG?uPlIB-m\ԁAF?[>,uPlIB:8 `KlIm"$24ؒ?u!d `KO?[eKh,D9u qQA!K?[>4ؒ,[G `!ȩ:~( $O}Y4ؒ,uqSA dْ> ENHG?tukS P_PAV^)J|m\CN%JSU-Vڊ6^Jo0 T)r-ڊr̝*:z#͠# Y/ݸy6n@}G0ʕ)O_TAmһᆱR:+ԱWr%wv77w{vP=(k֬v׿,un( MBcz.rd YJ:UIcOrtblńв {붗B̄OcG?yL segT@!r*R}~:]tAҡ;CsA6uسu%rv*QPYswi{ ~d@z&!+=>ƞf"n* #?Ѵk&a1u/YF8">)FCRMK+B(pڴ[jX1ye*h,5e=tG^c){Bi`k(UTB"Kݒtѣ486Y==ȩ[~1&dI:ϟ=0Z:?BCc(kT  [?uMz5MMfNʞ}zkQdh uC,9YS!Kݒtѣ486Y==@Ȳp􃌩BןshǔhV\+b8ڴn9۵.]M:ԬuGjվ&ncHw:{ DO_Hm^SRxUZbm߼~quRTlzwDUɹr\]7ʥsؑj_Ԥ=R&-)s,Ie'=r`ݸ~(׶S/jԬV1>U1\:,[(E\.}}Ze RMkE݀zuo`V,ܺ^~%7ă>ΛO_j5,"h:xB:&d; 1X zZL)U6Y+.M[7Pm)S&7oݠWҖ_{bPRe5 P (~#h}h\yԿ *T?RtD"˗/))@>kxb<e/̙3kNεϿo5VGEUSN=u]ldՋ7'y?ygӖEl3nH}MK_ף3(g)ퟺOKCR7~S7z&!˱A9u 2)X (4r4'aE\Kz?ıg,քFSѵ+)f:sg= 若>Sf},cH Z,*xXlc1ː=u+2eW2qۙnߺNo?Rm4f`a`-5l$!_rA.^B0S嫗Zv8ӠĢ1*ڴOs$!ŋL OAB29sWrr&5'ڵ DL;#ׯPDpiN4!3*RȘ"8 ,M&,BlŖ_OxB>cGЅ)~,*UVwLC=Ro(u:}=&P g-\k|8z꥖5LLTк(+z7û4ez Y4Ə-kbYE7nݠYq$ gϞR`a1<]5Y(c2cd6A "=f}~!9wBܜr-4{*!Kl8OQ,Ǟ?\-G?ȤTȒ"1X86YJ.cIIJ72H>,p[L>_ϜJ;aO4w[qqO{p.x k1~ń;(42*VU͛74ebqF[l'',B:iFCYd5nю'XpqeRdM'E'P\.tpфnH Iz 7iq)7 ,M-"\cK f,HK!)d̘A.Eui,LrҊ$czU,_IXX>"NB;\X(Etyj3)dTO0e8Jq@ Xec+dSfĈ%g '>ǎ f|6F'iE)hB3X{{YfF  d9 !k2آ'tmot`wt`"6)YSMxݽmP-+d)-8^2{;Nz)ݼvΟ~ؽC]e9g)+ ,qd ~8H_YckڌZwYC G؅/ _k%y>sd;I >qXR gW/eJaHZu=zPđ8WϫׯRf$WRr%}mvq1e~]5jNWNKWȒV]l]c6O˽R Y*w5B9g!K :OQ,Ǟ? dY` 8A&Bּ)4![y 2k Y=+W q W4}ḼZBG挌,On q2?cLTvWV"N)_43.F;%qyo{>jV:#Tl8[fؘE7+e&@e-s,i.^"~= YgBrѣ486Y==k!ǔ#Gի2)d6;&3=ʷov AwK`x"uo>}P!ncz1IQa~ԦC7SO›Dc+aQ_7_!aέ41n}UX^\ =O>}[]1- 1TR Y27ƕ|.[EV"s+׮% ?SYYҒ=;E~>! us.7b-ime K5ߝʔ*q#kZTh0F1"I_,k}מIB?G?D@ dY0T ^-._L`W٤ cłPx7\2 I#g,nf P_ppe;S\ gh/k!ǭB S'д9+EE}O?Ju%JSԴšJHóO >Zu$clܺ!DCwۍe,kY 6l^R Y.at w?2@;)/Nc<܅s4%"4hF)ZXJuM@5/Uy~qpO5~%nj.!ԭc$Q8?\{u42fHNdSNK5S"KXVEUȲ?A ;!˔8)}I54Q jץV uS˛ ovZaIYL۞K- Y<,d-X@YAAAf Z~1&8͞Mvlq,-VR,eim@ o ߻s&N93bĻyb_S[֍kkxNj]v)D8_m EHa1;O^B~xtauYmEh *]L_(~'ED۰ׯ^{H˻"Nf"KMLgĈX_m7OY,9ń?G԰ncETdcSx8{5^΢1>AԮeʜ9\;#'!GQĤ޻#,.^:/,:B~#(SLZ"gUݏZ6m_ (9,kG Yˢ٬ Bϙ|=E[gϞR`Z~MsܻWpo4k k Yvl M_>qvzpZ申O]˳jAVԴU `]|;. >$ٱd?hÞ널pý7"Sy`&)K-(d&_K͓IB+AiV,SS/jҸ*Yue(7?%vm;ӊ%sIvEYwDT\EcȄ'Jp> r2"VTZ߻CF{ЖoWeWؕް^$Le̳)1n8-s<ׯ`?79sqkW,L"L?GtwȩH1bD?y,,D.ܴ2cw[(tb;g+=xNEc˩}K&~ YJ7V\>L{nYOcdߞ>FO=XR&m̵/'hT9P<'tu)4 \G?9H= i>Ӟ>{8$ncSkdi!ᣇ5t\µ+@iNm?KiҳŘ\S|6jkbC >gxE^JpZxBmf1] YR)7YJNGj,̝*).qyC)ͭ[\zRR3V J)Һx  2qS/ttz x7ŌCB|ŪZ)/Xq- da#%ޅ,L2Alirڳs|7s&vS0>S?)OZs| hUԣ3oRP>wYf̏ҳ5GEL|.bDE\DŽ]8gnlH[w bҹeJ?jn@{_K2^S{xdbK 5'^ y Xb%eiY Z8+μ DZd}kx*\p Kkp\.oFt1!˩X)ʜ% _8}L/' N# Fd iƥ$Z~p,CwV}3O|a1Vvd,4*(L"Dta7]#E.~amѦqOD+@ UKkoP/F X_.ﭧOBZqK=5!JId}R>2@[bc3mjب|O~5qcVIw8ħEK9sLhc =A9FTƫemu?X.IQx<^yw !niB̂eDȺ{ s9J\^|͢S<O,Z:?^5}k!cF] Jh cx;[iBx28RTR5f.}D;ٲOB4Xl K3] 0@ev,,&MK+HqN?!>,BDi2j)3q,r0e~Vmc4lƾl)q1,CVZ8ȤC{M% n h?u,G^{z'!K)=;fԬUG?i&}G}*y,HI"ŌQ?qͭy /ake!y8K"#ߓ8gD!~nthnv .ak 84YYlrK/_-ׄ)˯id˾ɛO+NE';3ϜJD%9Ξ1#(~XoY!0Xf3xP%WXVZxO {wPhd|ŋc#L a4c侳;JD 3MjJS}K'R N=jG&oݠ+DZ{/>o.,xiT !h)B| Ѭ{2@ZڶqIAu /3_張Tr~ Jm H!%F_ξ[O+翔 Y2LM4gᣩ|j".*\I+!6s3b̭O Y[$m(Rb Xo.cIIlL uJc(ػa,VI74cj(IϦFӏ$F>͖WlL;kpU>w6;؝ZjPz! !Wݽme>Uu}&'׎MQ~peˮ1TE;u-bF9srTceaď t dA2峤NݺuB`cYD-YJG `SǟVQ4&3LHNKg3TJlF"&%חPׯ^=E e(ia1cFq?}0OgהGWHd/"5"bNK,che!O_|w1av7֦Xd.})ݼvΟ O7fL2U\!볲)hB>x}na>vƥX:{YDq#9dĵ0ȥrDe @ZdЁ; :ZnK ?kY>F.?'{ 7)Y M31z ݿ{.ĖV8{q^6/;POXU"bLq5jI 2+i%3ğ#ks,7JXsbm:~B!KZ=|p_Ǔ.kW,+'EtS,pv,Y>߄-nݼfbBg-VZi$@r !v>$2cAV dj]POB/-R*dP1:2B,[A d{eƾ sKaN}YMĮ fŊ8RT⾱DZvIB{?VSAMmE=Շ:q4_d>TdhTj lűߑRYF,55 rr:˲PQ% %S] yn\Lf̘IǁKHESif\W S@dB:)! BVrۣ1k}@ dJneU:A u@R7!Kfbz - b fFhSqzx.y N=s31>s,9ϿE$n'" BiB@tт deZÛ `oһ3ԗKY4kZi޺F(ù&oJ,5|/B17M:ƃX2%FhIG/h[/~_*S2y$sӺBOQbsaM@r|IBȩemJ(g޹C~׎Mb9sD>MeN9׮GQ]4ocBR[d|ƙsYrW -x?jj=Qޗ޼y&̓հ0,,si dx5}=TFd6jA"1RY9e^>R=Uڴt#;vmQ?e)Uէ d9ԦrY =|? !(ؼ(0[Ar_w[v/|>MN\JEJ7߇7ft93sR!K̫`O-N;,ݢmgZd5iގB(K*##g欤&-'-iؑBO`Hϛ7;5 :?\ >#"j\!F`)6yQ8߸ڱy-(>_n@=:S&-*23êoѾ]E D{X)2&d{:XELpp0իW&O B>kMr,Xz@瑚oy̅cҙL\Z3փOgBֺkcp A}XrԔRE:7I\M6IҫEVz?D;*Mh'~7?f;Y-KbJd0%Fh0lr\8U㔙ͩ/9TΎwr׬uG3]\z k KЧN& Y r/ߍY0\d.޿KHMsIԆbdݼqUX߱Q p@}Ib9V,G({7.b13"cʑ#IHBMDNPFmU d' cTϐk= YS'Nv:]Ⱥs6Z5jPT@""^~A5H@ Y-un :Kq%^/rօ,'eVL5Y8;cGot-M9? %ݘlez>{nVC)Q[{YJ(^`7ntڽc“M3px Oٖj/9]t I.)}ѵeka/Y+ucatͲbMk.} ~˦>q+<ݻO;lXK}άU?udm)dIK,&^;~Oazڳq,(~jTZ4iEmZϿo5VGEUSN=aƔ9sfQǕ:i. j7oݠWҖ_`BT:"gjΪRhhڶs }]6 8j׬K=;h٪%oRil<Ǐ43vQHeFd-jި5 ,դ ~/-Ek]M7n^<\R/k?C;wm(TNt5Ki |يcd@.ޝbpiDzLH'ѼU2VRzQOd~9K7i?NiYiwnSedRFp3( c,O?JubKVy?) ^Q>s(e[7Ygepb"3id-/+qd?.c(ųBVN=E8(E__|-U n͜:3@cfʘG/g!q'McCHrm8Cxjmcm\tJ{wm%SYK!k 7{̕3c=hlT-s"'_xB-ġKRVZ|z$ʕ)Os h={v5?"&eϟϚ4hF#)ۘ!Q1i۷B)Y9鳧hԧ{?+ޛ89~)d,@q $1 //<B3X E9>Aݽ |RP6J+Ib ц)SfuRxP$x?f/I;F)_O-"Wh1ʞ-;XLpϙ#<؉3߫J,y<}"EDW_s3ę=u5kV͘~:## oNE?]< ~WvzO<[sۓciB:zS7jN ,(t;9(MtL~6 dٴX d&!dG=ȤTȺ{x%.ZO+Vp2.*wiVU!TkԂ/Ј$ɹИCgeA): *tud͠>Η{!cL6VfAC}W¥a-} 4:4XJ7̬e˩d=ϡ=}Gl2GR+]rIdaK1~8 ~0*>\D\ev))8 x빳i"Hk԰ikѾ2Mn{J!~nBӵSiJ1%{Q WB©x0ΟOӢu:v:A`;$ʝ+;0ǒ 9+z9 jOkAXr>qď}!GQDJ+/ 7wZw$#&L Q)%/ʔ),UXBƌ* _UO\s{7_ʝ+}Z3&b(_qMR;s  ;i情ӱ(alSI'zSO QGXW&Mki8?b~'Rޏ( !K<"rQtjM_&N'bU YnkeSypԃLJ,}I 3S,ʄF'I$!/ei Y,M {wPhdH|޼y#R2sP}ͤ"D'eW^ &ja4|d&.geE%LgNn!epQ%+9ef[c\2G^^,;,Ik7&wԬQ$X$/7_?,> LOS"'-N|B#^4l~ Y@};_Or82d=BUm7>#ĥI\h<v?ؗy Iftyzx_L)d%Nϩh1ď!x*\Sx._DSNO1)lԸ~SQ=nbya]5VW?uk* O|Z(s6ˈhwR~p4K$,d_pQtjsޜ>i+QS/i Yiuf1tEBvԃ%,c`um,ceѭCi;RhcA+&~p}\:O##-۰Wo`O)q|S,P/lvkSG#d)׆gx_橜[u/OsGB鳧Bpoڰ|αLƞ-cѓ? 7}ukBr~dR*ď?zJ<˚qd&595VF'β!d?uFiH dy(9\-G=ȤTxH^z,pEvBgI/PF@$#/G8|>ʑeNn^Kk>!K 8Bg !WZ5i^c?CB!YdYRȒY MY=i{5_i"ݖ6]l%dxLA8"[^Ikq5mԜFyN%.:sqM[Ǵ21?{ e44fY?,H dDG=ȤTȒ1}QSēbAGaEPيUxϨhk$KDxn\Lğʘ1:.[8| ̸Hl!S?m\*Z[% IEּ[4YK5j'Bt|wXdB= Yn,`URWʖ.'*P,JRZd-j׬kHXx@kbcR}PAzcqZ*dIz]6-9^c,u?uFiH dy(9YdR"dԨJ:APr+[ַ=5jֆB)N%oks,|qqѡd{l5f{[ю- ڿg'EQ=m^4ʽ?]|`,v=b08vX#Oāuϒnw-dSE`}C.SINiګ%ٿ&M6-'qB o(F)\F{\ >zH#\ʵ+cdqL&#ɟJ%ЌݺsKgAKZzW쥉SGhr4ʕ3I HcERЛ:q&輾f)u-T}A-)dI7F_C1x\XiX%Y:OݨQYic1tN9u Q2 Y|ak,~9SfW&Iw8C.k21_M9Cwn^ﶈ uâsρI.GQéR/ga&E&}Y ߴ9+a6&g-TZ:~@}d(9Ȭ,Z$/I+Y n#ris{|UȒ 3'&<{=Y #\e-FR̙Ug-dk1)&%'xfNS_UPEkI:1X$1&dISIrMQђו\wn 'k Yᬅa:ăFz'Zk,9)fBNpe-ko߁4+n^yL9,un( i1E:'!Kpԃ1!{n WHz!FP#5R`1- .*~e"ǍO!ܾIQr {4>&Qj/!%orD?x m:̄RxHd~̸Ign\L^Cm;e8׶Mk(2dsXL"ΓWY߻C~ԺCwMǏ`WvZ*"i(r V s#ACxк;x{ٲkڟ# B ;JDZDaй}Wpq f΋*,9u͚@;$auυK)8<~V}y si5)<:spDZv?R8+"8JcciEM4aT Aў~AP B]xDj\,a+̌}RKѸ "C#?>jfok Y˕4ύ]8GFRn}){Dl9%lXE9+ca9H́n@.ZrENv-'hDXEY@:OݨQYic1tNBe (.#FSB 3>h Bd)Sҩ{?iıغe[xktu*Is&Qth^ )W]Wڔ7Q?X׵#|ܼ~ S""?N9aK/g!2,LAU^t WA>>r I2f˗sf?x,X0{wnS-Z(;c1X} "J@>z}sF'N^Ofex׆k?d=9B c߭eRF)'dooc)pڴ[Lrk:}(OٵFwu!۾Bc4>+UVsoL86@ IDATqɾߴ8*iDzLKDQ5kV߾a,dm>wnוvAx["K-XܸuFPdnzRú%|ԌWX^:O߬\L[vnk;Q]4"",uPn( i1E:'!KAF?vL Lh#B= Y?uPlIB-m{YhVE6$Uh4j޺蚤'2; 2e })E @2pSA dْ> ijpZTlCnK D*UpbwϞQ."y lSuM!K4aԭCi%YAB"gaZdېgT&6YM!ˬiE!@2R2J- @Ȳ%} "`9 j c58X̧#2? Pony{ dh:2@ @J,=J- @Ȳ%} "`9 j c5N?YV_h-Y?uPlIB-m{YhVE `uAȲ@ n @R78 `KlImEBôZ58X -*B՗tKBO?[eKh,D/rժAjhQ1X[M=4ؒ,[G `!~0V 2VCA}e%@ nqSA dْ> ijpZT V'`,/4,uS:~( $!˖6X_,4LUТb:{ dY} H dzCi%YAB"gaZd !K @%!KJ- @Ȳ%} "`9 j c5N?YV_h-Y?uPlIB-m{YhVE `uAȲ@ n @R78 `KlImEBôZ58X -*B՗tKBO?[H"dٲ3h@@UNK˃L:> i"dEA@:%0,{:aJ=xuҮ#"gR$25 !˚A,qC)'iF'`B@@@-Yv1pke-@@@ W   `q, ! @@@,LB:xKBX, Eu    ! k@@@CBuVEX, Eu    ,,0Y@@@@BX,pE    ,0Y@@@@-YX    `a, Ձ,YZA@@@YX    `a, Ձ[@@@@ dY(YX    `,@@@@ dY( da) @Ȳ0PT   @@@@: dY+jXda  @Ȳ0PT   o @R ea@@@ da u@ȲW     ea@@@   &!@Q@e@@@`5   &!@Q%! K@@@,LB:5   !!:\Q+" k@@@,LB:xKBX, Eu    ! k@@@CBuVEX, Eu    ,,0Y@@@@BX,pE    ,0Y@@@ޝGGQy$+&>è$(Ae,@4$a QbЀ0"/^(^\`@*ʢQyc$Iw*U ].Y<   @ d50(! Y<  5Yָ*  ,@@@@x@@ j`PC@x@@kqU@om@fVd\G@ d!@:2 o dy@, Ȳ&@0ɝ5@h W#Ȋ:}! ']# ]_f rn9Y<  EY, `R $ #`At  rw}8W ˹cd   dYK I,܎Y65 [ ev \,֎#@3 X$@e,"&Lr;6 dوO  n,wח!s[;FA  `AE4 2 (@e#>]# ]_f rn9Y<  EY, `R $ #`At  rw}8W ˹cd   dYK I,܎Y65 [ ev \,֎#@3 X$@e,"&Lr;6 dوO  n,wח!s[;FA  `AE4 2 (@e#>]# ]_f rn9Y<  EY, `R $ #`At  rw}8W ˹cd   dYK I,܎Y65 [ ev \,֎#@3 X$@e,"&Lr;6 dوO  n,wח!@*++%!!tY ilc@pA+@ !C瓤zwOUo:nDv,K@@Yn,B;Tddeee+Rg{$u@x׎eOh*mo u)w+ _`=}oر_;{5X4$r??Ėu"!T, DS +^PӭOEiIL-NI?sVʄQR`/LS[MxΘ*+n;zʇ Sf  MhjxIX8H-/Yj3EU'OH٢"Yr4<=\w# 4a,Ӓ5XP+s|Y$  Y,ϖ#WeEhd,ڪVh^壶^lYj K3gΜ@@ dy'lUVAV:v,M)Yrs[󢼵/ͺ+kw?h\?>$oozJGdLTm/ܧJq^_'*d-v;Jҥ[9sM?1n{eUxDp⎻zKN~\x%<)/#ׯm?2^Z]wCmjj믾,w~Wuvyh4$E eëkCۤwdYƙg^ھpEp  AV`\D P۪-*92ӗ.gȄ'%..N_l?BW͚_!'Ov#?Tْxȏ!vlסu7$M/HtH C2˗.kV=/8\Fi?> _:uJKgq~)(w}<6z=rJeY*[rQ}^{}J=OoNByLU53kک{{ƤM#'H\ƺ#JHh*Ce.ÇcO+ynj_B!):L‡ Uf  `A-tkUV@ +C[ew]=&ˡJ!ɞ:QTFt#|TNJꡌ|9߅ZXBZ*CGHJF.(2:ug#ݓ):h-X$ əWO?3?7,5~Tش1Nq^Ji}c;=wU˓^}a*zۻT ԛ&՘2 tHvI9Q#mBzW=SE@h 45VA"o;KOw|r'OXݣ&m"z*tӫο@ fdJ6dUa͸yD=Ym/To{2{So]|iK?|PN%~*ŋ_kZ]^Eml/E%/;omLAՔBZ#,=   @ DPX>OԊ,.d8A֯υ{۶Ȼo \In{F9Og ]gIUyum`\Q叝эqo:r^!xq) #"Ȋ_ @@@6p{ BW$eGϚ9Od=*+C-aPۛ;"K]i{FodƴVe d0l|bKM[ Y+nݯ>Aj7d^I{I@@@k5V k~` t3 S&d暑ʧkokī< u{} 6Mշ d O}dz~A k6ͷ<_>AV^YY>\   Vc`ddv,;?. _ndj̬IOUtO? K.VdշP^`cll+ܲ́jjCG=Vpvczs〇G.r_DN:=ܷ7YG(="lgdն%l- :@@@ '" d?Oyv~ݽ{ַ/3̴6IWn٪=FP]=M1:ut[ OdSs&0&m.ܷkQ{' ^q @@@+ fB/~*ede5ΈRMɜ@ TS{_j3kmyq`GmS4^D#/'CnwL̕fͯe.XRRB$UROmTaV-FVsD:Ӳ1Nލ-{v _;P>`=<¼rQ>Y^6sE@@.`ƪO ];ꬼ _rJ|G:\>|o];rz{٥"^EÓ{?3t!FɾW{M@E3S/>^#ҩM-plsTd>LIޤ3d4k({v@y S;mjëkLc͚_WI"CGN}d?y>Y^2sD@@\ Vc IRRRgŷ TrczQ﮲].xWdLR02Y2ltqɓ̈́s*7!֧ VfmڰN^Z^.۷n_~>w?<xZVW3U&1VRvWyz^Y*j:yiټ5IHh^Ǫ滪\ܸzvg/=.R,08 gEֱ_ e%g:#,...&O2@@@`z^ Lcр_SgMO   1 @EpbhYQF   %(A((AVlԁQ   DI +J. +6 Ju`   Q Ȋ˺!ȊdF   @M7X @e*M"  ĮAVֆ!PAV]B|   *,WxL cg   0' d9z@@@ bɸ ȊR0@@@hdEC>F WZE@@Q- B 0@@@=Y%3AjΌ@@@O dyLY/ G@@L +2/F b@@@r,ˉ,a@@@X ȊŪ0& ω@@@\"@B2 O dyL@@Awk̝/@2@@@"RbL + p@@@ Ȳ֗R J]F@@9+ B l@@@ Yn"sAW+ϼ@@@ dyLY(#@@@W +\)C b&@@@B, qi,y@@@ Ȋz0" ȊDk@fCIDAT@@/@2 dyL@@AΜ"@J2@@@b"bR +& @@@0,ڧ]^3gΜz@@@@^,{TmhDIENDB`labelled/vignettes/look_for.Rmd0000644000176200001440000001150414737244525016322 0ustar liggesusers--- author: "Joseph Larmarange" title: "Generate a data dictionary and search for variables with `look_for()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Generate a data dictionary and search for variables with `look_for()`} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ## Showing a summary of a data frame ### Default printing of tibbles It is a common need to easily get a description of all variables in a data frame. When a data frame is converted into a tibble (e.g. with `dplyr::as_tibble()`), it as a nice printing showing the first rows of the data frame as well as the type of column. ```{r message=FALSE} library(dplyr) ``` ```{r} iris %>% as_tibble() ``` However, when you have too many variables, all of them cannot be printed and their are just listed. ```{r} data(fertility, package = "questionr") women ``` Note: in **R** console, value labels (if defined) are usually printed but they do not appear in a R markdown document like this vignette. ### `dplyr::glimpse()` The function `dplyr::glimpse()` allows you to have a quick look at all the variables in a data frame. ```{r} glimpse(iris) glimpse(women) ``` It will show you the first values of each variable as well as the type of each variable. However, some important informations are not displayed: - variable labels, when defined; - value labels for labelled vectors; - the list of levels for factors; - the range of values for numerical variables. ### `labelled::look_for()` `look_for()` provided by the `labelled` package will print in the console a data dictionary of all variables, showing variable labels when available, the type of variable and a list of values corresponding to: - levels for factors; - value labels for labelled vectors; - the range of observed values in the vector otherwise (if `details = "full"`). ```{r} library(labelled) look_for(iris) look_for(women) ``` Note that `lookfor()` and `generate_dictionary()` are synonyms of `look_for()` and works exactly in the same way. If there is not enough space to print full labels in the console, they will be truncated (truncation is indicated by a `~`). ## Searching variables by key When a data frame has dozens or even hundreds of variables, it could become difficult to find a specific variable. In such case, you can provide an optional list of keywords, which can be simple character strings or regular expression, to search for specific variables. ```{r} # Look for a single keyword. look_for(iris, "petal") look_for(iris, "s") # Look for with a regular expression look_for(iris, "petal|species") look_for(iris, "s$") # Look for with several keywords look_for(iris, "pet", "sp") # Look_for will take variable labels into account look_for(women, "read", "level") ``` By default, `look_for()` will look through both variable names and variables labels. Use `labels = FALSE` to look only through variable names. ```{r} look_for(women, "read") look_for(women, "read", labels = FALSE) ``` Similarly, the search is by default case insensitive. To make the search case sensitive, use `ignore.case = FALSE`. ```{r} look_for(iris, "sepal") look_for(iris, "sepal", ignore.case = FALSE) ``` ## Level of details If you just want to use the search feature of `look_for()` without computing the details of each variable, simply indicate `details = "none"` or `details = FALSE`. ```{r} look_for(women, "id", details = "none") ``` If you want more details (but can be time consuming for big data frames), indicate `details = "full"` or `details = TRUE`. ```{r} look_for(women, details = "full") look_for(women, details = "full") %>% dplyr::glimpse() ``` ## Advanced usages of `look_for()` `look_for()` returns a detailed tibble which is summarized before printing. To deactivate default printing and see full results, simply use `dplyr::as_tibble()`, `dplyr::glimpse()` or even `utils::View()`. ```{r, eval=FALSE} look_for(women) %>% View() ``` ```{r} look_for(women) %>% as_tibble() glimpse(look_for(women)) ``` The tibble returned by `look_for()` could be easily manipulated for advanced programming. When a column has several values for one variable (e.g. `levels` or `value_labels`), results as stored with nested named list. You can convert named lists into simpler character vectors, you can use `convert_list_columns_to_character()`. ```{r} look_for(women) %>% convert_list_columns_to_character() ``` Alternatively, you can use `lookfor_to_long_format()` to transform results into a long format with one row per factor level and per value label. ```{r} look_for(women) %>% lookfor_to_long_format() ``` Both can be combined: ```{r} look_for(women) %>% lookfor_to_long_format() %>% convert_list_columns_to_character() ``` labelled/vignettes/approaches.drawio0000644000176200001440000000270014357761455017402 0ustar liggesusers7VlNc5swEP01PqZjIP46+iNOM01m0ubQppeODAsoFVpGyDbk11dgYcDErps4CY57SbRPK4He29VKuGWNg/hSkNC/QQdYy2w7ccuatEzTaPcH6l+KJCtk0NWAJ6ijnQrgjj5CPlKjc+pAVHGUiEzSsArayDnYsoIRIXBZdXORVZ8aEg9qwJ1NWB39Th3pr9C+2Svwz0A9P3+yka8vILmzXknkEweXJci6aFljgShXrSAeA0vJy3lZjZtu6V2/mAAu9xlw05ldX329nRpB+CWaX0WTQV+edfS7ySRfMDhq/dpEIX30kBN2UaAjgXPuQDprW1mFzzViqEBDgQ8gZaLFJHOJCvJlwHQvxFT+KLXv06k+dbQ1ifXMmZHkBpci+VE2SqNSsxiWWfm4OkuauAjnwoYd1OTRRoQHcoefufJLeSs9QGtwCRiAeh/lIIARSRfVuCI6PL21X6GgamgR/0FQPe+CsLl+0oRIopCrIFRS1eSuirn0qYS7kGTELFVGV4VzKWNjZCiysZZDoO/aCo+kwN9Q6unafZi5u+hfgJAQ7yRM957r/Emq5rLIRiNPMb+UibnfwRnuHkvKPD/0zT1D32pU6Ju10B8jV3EWUeRZ2WilcdJlakWjmVAtL225xJYooqxvqv7yeQCC2gdNFDCcDvSeSpRBt2eR7mESxWw3LFN6Hz9TrD0z5bxRmWJtKxI2A8Ip9+ppoh0E2OhkDkdXR6xuw9LjfJsKRMV7EtHoCEnumA0j2fh/wt3KzWDPzctoVp0ffPQj7jpD3i9rjqZ0v8O9cN+LodGsom/Ur4andT7ePAA0IM2O5i75Dmm279E6j+u3T7PHe2/xazxdkN5DspjNgntkg/wDTBMk7b2mpht0H05kanozJ7bFz6V70x59c70lnp29VNJs6FAIkpQcQqRcRqWZb1OgtGFs1uXOxhfWv/jnF/IinFZvUATXeikv2ENO8jK3+a2jAXv5KVznGkBzv0bzMAwFEttX6LDGsVqorBJZJYwjhw12NUQY9bgyGbjpDClp1CZsqOGAOk62eT+lW1VZF7nMC8D2zfLZ14V9NTFfTZP6jaykyegUNTHabyjK9or51HYE8ZHekmubUf/1NiNlFr/Mrkp18fu2dfEHlabelled/vignettes/packed_columns.Rmd0000644000176200001440000000465014466735327017507 0ustar liggesusers--- author: "Joseph Larmarange" title: "Variables labels and packed columns" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Variables labels and packed columns} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- The **tidyr** package allows to group several columns of a tibble into one single df-column, see `tidyr::pack()`. Such df-column is itself a tibble. It's not currently clear why you would ever want to pack columns since few functions work with this sort of data. ```{r} library(tidyr) d <- iris %>% as_tibble() %>% pack( Sepal = starts_with("Sepal"), Petal = starts_with("Petal"), .names_sep = "." ) str(d) class(d$Sepal) ``` Regarding variable labels, you may want to define a label for one sub-column of a df-column, or eventually a label for the df-column itself. For a sub-column, you could use easily `var_label()` to define your label. ```{r} library(labelled) var_label(d$Sepal$Length) <- "Length of the sepal" str(d) ``` But you cannot use directly `var_label()` for the df-column. ```{r} var_label(d$Petal) <- "wrong label for Petal" str(d) ``` As `d$Petal` is itself a tibble, applying `var_label()` on it would have an effect on each sub-column. To change a variable label to the df-column itself, you could use `label_attribute()`. ```{r} label_attribute(d$Petal) <- "correct label for Petal" str(d) ``` On the other hand, `set_variable_labels()` works differently, as the primary intention of this function is to work on the columns of a tibble. ```{r} d <- d %>% set_variable_labels(Sepal = "Label of the Sepal df-column") str(d) ``` This is equivalent to: ```{r} var_label(d) <- list(Sepal = "Label of the Sepal df-column") str(d) ``` To use `set_variable_labels()` on sub-columns, you should use this syntax: ```{r} d$Petal <- d$Petal %>% set_variable_labels( Length = "Petal length", Width = "Petal width" ) str(d) ``` If you want to get the list of variable labels of a tibble, by default `var_label()` or `get_variable_labels()` will return the labels of the first level of columns. ```{r} d %>% get_variable_labels() ``` To obtain the list of variable labels for sub-columns, you could use `recurse = TRUE`: ```{r} d %>% get_variable_labels(recurse = TRUE) d %>% get_variable_labels( recurse = TRUE, null_action = "fill", unlist = TRUE ) ``` labelled/vignettes/missing_values.Rmd0000644000176200001440000001445514357761455017554 0ustar liggesusers--- author: "Joseph Larmarange" title: "About missing values: regular NAs, tagged NAs and user NAs" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{About missing values: regular NAs, tagged NAs and user NAs} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- In base **R**, missing values are indicated using the specific value `NA`. **Regular NAs** could be used with any type of vector (double, integer, character, factor, Date, etc.). Other statistical software have implemented ways to differentiate several types of missing values. **Stata** and **SAS** have a system of **tagged NAs**, where NA values are tagged with a letter (from a to z). **SPSS** allows users to indicate that certain non-missing values should be treated in some analysis as missing (**user NAs**). The `haven` package implements **tagged NAs** and **user NAs** in order to keep this information when importing files from **Stata**, **SAS** or **SPSS**. ```{r} library(labelled) ``` ## Tagged NAs ### Creation and tests **Tagged NAs** are proper `NA` values with a tag attached to them. They can be created with `tagged_na()`. The attached tag should be a single letter, lowercase (a-z) or uppercase (A-Z). ```{r} x <- c(1:5, tagged_na("a"), tagged_na("z"), NA) ``` For most **R** functions, tagged NAs are just considered as regular NAs. By default, they are just printed as any other regular NA. ```{r} x is.na(x) ``` To show/print their tags, you need to use `na_tag()`, `print_tagged_na()` or `format_tagged_na()`. ```{r} na_tag(x) print_tagged_na(x) format_tagged_na(x) ``` To test if a certain NA is a regular NA or a tagged NA, you should use `is_regular_na()` or `is_tagged_na()`. ```{r} is.na(x) is_tagged_na(x) # You can test for specific tagged NAs with the second argument is_tagged_na(x, "a") is_regular_na(x) ``` Tagged NAs could be defined **only** for double vectors. If you add a tagged NA to a character vector, it will be converted into a regular NA. If you add a tagged NA to an integer vector, the vector will be converted into a double vector. ```{r, error=TRUE} y <- c("a", "b", tagged_na("z")) y is_tagged_na(y) format_tagged_na(y) z <- c(1L, 2L, tagged_na("a")) typeof(z) format_tagged_na(z) ``` ### Unique values, duplicates and sorting with tagged NAs By default, functions such as `base::unique()`, `base::duplicated()`, `base::order()` or `base::sort()` will treat tagged NAs as the same thing as a regular NA. You can use `unique_tagged_na()`, `duplicated_tagged_na()`, `order_tagged_na()` and `sort_tagged_na()` as alternatives that will treat two tagged NAs with different tags as separate values. ```{r} x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA) x %>% print_tagged_na() unique(x) %>% print_tagged_na() unique_tagged_na(x) %>% print_tagged_na() duplicated(x) duplicated_tagged_na(x) sort(x, na.last = TRUE) %>% print_tagged_na() sort_tagged_na(x) %>% print_tagged_na() ``` ### Tagged NAs and value labels It is possible to define value labels for tagged NAs. ```{r} x <- c(1, 0, 1, tagged_na("r"), 0, tagged_na("d"), tagged_na("z"), NA) val_labels(x) <- c( no = 0, yes = 1, "don't know" = tagged_na("d"), refusal = tagged_na("r") ) x ``` When converting such labelled vector into factor, tagged NAs are, by default, converted into regular NAs (it is not possible to define tagged NAs with factors). ```{r} to_factor(x) ``` However, the option `explicit_tagged_na` of `to_factor()` allows to transform tagged NAs into explicit factor levels. ```{r} to_factor(x, explicit_tagged_na = TRUE) to_factor(x, levels = "prefixed", explicit_tagged_na = TRUE) ``` ### Conversion into user NAs Tagged NAs can be converted into user NAs with `tagged_na_to_user_na()`. ```{r} tagged_na_to_user_na(x) tagged_na_to_user_na(x, user_na_start = 10) ``` Use `tagged_na_to_regular_na()` to convert tagged NAs into regular NAs. ```{r} tagged_na_to_regular_na(x) tagged_na_to_regular_na(x) %>% is_tagged_na() ``` ## User NAs `haven` introduced an `haven_labelled_spss` class to deal with user defined missing values in a similar way as **SPSS**. In such case, additional attributes will be used to indicate with values should be considered as missing, but such values will not be stored as internal `NA` values. You should note that most R function will not take this information into account. Therefore, you will have to convert missing values into `NA` if required before analysis. These defined missing values could co-exist with internal `NA` values. ### Creation User NAs could be created directly with `labelled_spss()`. You can also manipulate them with `na_values()` and `na_range()`. ```{r} v <- labelled(c(1, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9)) v na_values(v) <- 9 v na_values(v) <- NULL v na_range(v) <- c(5, Inf) na_range(v) v ``` NB: you cant also use `set_na_range()` and `set_na_values()` for a `dplyr`-like syntax. ```{r} library(dplyr) # setting value labels and user NAs df <- tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) %>% set_value_labels(s2 = c(yes = 1, no = 2)) %>% set_na_values(s2 = 9) df$s2 # removing user NAs df <- df %>% set_na_values(s2 = NULL) df$s2 ``` ### Tests Note that `is.na()` will return `TRUE` for user NAs. Use `is_user_na()` to test if a specific value is a user NA and `is_regular_na()` to test if it is a regular NA. ```{r} v is.na(v) is_user_na(v) is_regular_na(v) ``` ### Conversion For most **R** functions, user NAs values are **still** regular values. ```{r} x <- c(1:5, 11:15) na_range(x) <- c(10, Inf) val_labels(x) <- c("dk" = 11, "refused" = 15) x mean(x) ``` You can convert user NAs into regular NAs with `user_na_to_na()` or `user_na_to_regular_na()` (both functions are identical). ```{r} user_na_to_na(x) mean(user_na_to_na(x), na.rm = TRUE) ``` Alternatively, if the vector is numeric, you can convert user NAs into tagged NAs with `user_na_to_tagged_na()`. ```{r} user_na_to_tagged_na(x) mean(user_na_to_tagged_na(x), na.rm = TRUE) ``` Finally, you can also remove user NAs definition without converting these values to `NA`, using `remove_user_na()`. ```{r} remove_user_na(x) mean(remove_user_na(x)) ``` labelled/vignettes/labelled.Rmd0000644000176200001440000003660114737244525016261 0ustar liggesusers--- author: "Joseph Larmarange" title: "Introduction to labelled" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Introduction to labelled} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- The purpose of the **labelled** package is to provide functions to manipulate metadata as variable labels, value labels and defined missing values using the `haven_labelled` and `haven_labelled_spss` classes introduced in `haven` package. These classes allow to add metadata (variable, value labels and SPSS-style missing values) to vectors. It should be noted that **value labels** doesn't imply that your vectors should be considered as categorical or continuous. Therefore, value labels are not intended to be use for data analysis. For example, before performing modeling, you should convert vectors with value labels into factors or into classic numeric/character vectors. Therefore, two main approaches could be considered. ![Two main approaches](approaches.png){width=100%} In **approach A**, `haven_labelled` vectors are converted into factors or into numeric/character vectors just after data import, using `unlabelled()`, `to_factor()` or `unclass()`. Then, data cleaning, recoding and analysis are performed using classic **R** vector types. In **approach B**, `haven_labelled` vectors are kept for data cleaning and coding, allowing to preserved original recoding, in particular if data should be reexported after that step. Functions provided by `labelled` will be useful for managing value labels. However, as in approach A, `haven_labelled` vectors will have to be converted into classic factors or numeric vectors before data analysis (in particular modeling) as this is the way categorical and continuous variables should be coded for analysis functions. ## Variable labels A variable label could be specified for any vector using `var_label()`. ```{r} library(labelled) var_label(iris$Sepal.Length) <- "Length of sepal" ``` It's possible to add a variable label to several columns of a data frame using a named list. ```{r} var_label(iris) <- list( Petal.Length = "Length of petal", Petal.Width = "Width of Petal" ) ``` To get the variable label, simply call `var_label()`. ```{r} var_label(iris$Petal.Width) var_label(iris) ``` To remove a variable label, use `NULL`. ```{r} var_label(iris$Sepal.Length) <- NULL ``` In **RStudio**, variable labels will be displayed in data viewer. ```{r, eval=FALSE} View(iris) ``` You can display and search through variable names and labels with `look_for()`: ```{r} look_for(iris) look_for(iris, "pet") look_for(iris, details = FALSE) ``` ## Value labels The first way to create a labelled vector is to use the `labelled()` function. It's not mandatory to provide a label for each value observed in your vector. You can also provide a label for values not observed. ```{r} v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 8, refused = 9) ) v ``` Use `val_labels()` to get all value labels and `val_label()` to get the value label associated with a specific value. ```{r} val_labels(v) val_label(v, 8) ``` `val_labels()` could also be used to modify all the value labels attached to a vector, while `val_label()` will update only one specific value label. ```{r} val_labels(v) <- c(yes = 1, nno = 3, bug = 5) v val_label(v, 3) <- "no" v ``` With `val_label()`, you can also add or remove specific value labels. ```{r} val_label(v, 2) <- "maybe" val_label(v, 5) <- NULL v ``` To remove all value labels, use `val_labels()` and `NULL`. The `haven_labelled` class will also be removed. ```{r} val_labels(v) <- NULL v ``` Adding a value label to a non labelled vector will apply `haven_labelled` class to it. ```{r} val_label(v, 1) <- "yes" v ``` Note that applying `val_labels()` to a factor will generate an error! ```{r, error = TRUE} f <- factor(1:3) f val_labels(f) <- c(yes = 1, no = 3) ``` You could also apply `val_labels()` to several columns of a data frame. ```{r} df <- data.frame(v1 = 1:3, v2 = c(2, 3, 1), v3 = 3:1) val_label(df, 1) <- "yes" val_label(df[, c("v1", "v3")], 2) <- "maybe" val_label(df[, c("v2", "v3")], 3) <- "no" val_labels(df) val_labels(df[, c("v1", "v3")]) <- c(YES = 1, MAYBE = 2, NO = 3) val_labels(df) val_labels(df) <- NULL val_labels(df) val_labels(df) <- list(v1 = c(yes = 1, no = 3), v2 = c(a = 1, b = 2, c = 3)) val_labels(df) ``` ## Sorting value labels Value labels are sorted by default in the order they have been created. ```{r} v <- c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA) val_label(v, 1) <- "yes" val_label(v, 3) <- "no" val_label(v, 9) <- "refused" val_label(v, 2) <- "maybe" val_label(v, 8) <- "don't know" v ``` It could be useful to reorder the value labels according to their attached values, with `sort_val_labels()`. ```{r} sort_val_labels(v) sort_val_labels(v, decreasing = TRUE) ``` If you prefer, you can also sort them according to the labels. ```{r} sort_val_labels(v, according_to = "l") ``` ## User defined missing values (SPSS's style) `haven` (>= 2.0.0) introduced an additional `haven_labelled_spss` class to deal with user defined missing values. In such case, additional attributes will be used to indicate with values should be considered as missing, but such values will not be stored as internal `NA` values. You should note that most R function will not take this information into account. Therefore, you will have to convert missing values into `NA` if required before analysis. These defined missing values could co-exist with internal `NA` values. It is possible to manipulate this missing values with `na_values()` and `na_range()`. Note that `is.na()` will return `TRUE` as well for user-defined missing values. ```{r} v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) v na_values(v) <- 9 na_values(v) v is.na(v) na_values(v) <- NULL v na_range(v) <- c(5, Inf) na_range(v) v ``` Since version 2.1.0, it is not mandatory to define at least one value label before defining missing values. ```{r} x <- c(1, 2, 2, 9) na_values(x) <- 9 x ``` To convert user defined missing values into `NA`, simply use `user_na_to_na()`. ```{r} v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v v2 <- user_na_to_na(v) v2 ``` You can also remove user missing values definition without converting these values to `NA`. ```{r} v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v v2 <- remove_user_na(v) v2 ``` or ```{r} v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v na_values(v) <- NULL v ``` ## Other conversion to NA In some cases, values who don't have an attached value label could be considered as missing. `nolabel_to_na()` will convert them to `NA`. ```{r} v <- labelled(c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, maybe = 2, no = 3)) v nolabel_to_na(v) ``` In other cases, a value label is attached only to specific values that corresponds to a missing value. For example: ```{r} size <- labelled(c(1.88, 1.62, 1.78, 99, 1.91), c("not measured" = 99)) size ``` In such cases, `val_labels_to_na()` could be appropriate. ```{r} val_labels_to_na(size) ``` These two functions could also be applied to an overall data frame. Only labelled vectors will be impacted. ## Converting to factor A labelled vector could easily be converted to a factor with `to_factor()`. ```{r} v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 8, refused = 9) ) v to_factor(v) ``` The `levels` argument allows to specify what should be used as the factor levels, i.e. the labels (default), the values or the labels prefixed with values. ```{r} to_factor(v, levels = "v") to_factor(v, levels = "p") ``` The `ordered` argument will create an ordinal factor. ```{r} to_factor(v, ordered = TRUE) ``` The argument `nolabel_to_na` specify if the corresponding function should be applied before converting to a factor. Therefore, the two following commands are equivalent. ```{r} to_factor(v, nolabel_to_na = TRUE) to_factor(nolabel_to_na(v)) ``` `sort_levels` specifies how the levels should be sorted: `"none"` to keep the order in which value labels have been defined, `"values"` to order the levels according to the values and `"labels"` according to the labels. `"auto"` (default) will be equivalent to `"none"` except if some values with no attached labels are found and are not dropped. In that case, `"values"` will be used. ```{r} to_factor(v, sort_levels = "n") to_factor(v, sort_levels = "v") to_factor(v, sort_levels = "l") ``` The function `to_labelled()` could be used to turn a factor into a labelled numeric vector. ```{r} f <- factor(1:3, labels = c("a", "b", "c")) to_labelled(f) ``` Note that `to_labelled(to_factor(v))` will not be equal to `v` due to the way factors are stored internally by **R**. ```{r} v to_labelled(to_factor(v)) ``` ## Other type of conversions You can use `to_character()` for converting into a character vector instead of a factor. ```{r} v to_character(v) ``` To remove the `haven_class`, you can simply use `unclass()`. ```{r} unclass(v) ``` Note that value labels will be preserved as an attribute to the vector. ```{r} remove_val_labels(v) ``` To remove value labels, use `remove_val_labels()`. ```{r} remove_val_labels(v) ``` Note that if your vector does have user-defined missing values, you may also want to use `remove_user_na()`. ```{r} x <- c(1, 2, 2, 9) na_values(x) <- 9 val_labels(x) <- c(yes = 1, no = 2) var_label(x) <- "A test variable" x remove_val_labels(x) remove_user_na(x) remove_user_na(x, user_na_to_na = TRUE) remove_val_labels(remove_user_na(x)) unclass(x) ``` You can remove all labels and user-defined missing values with `remove_labels()`. Use `keep_var_label = TRUE` to preserve only variable label. ```{r} remove_labels(x, user_na_to_na = TRUE) remove_labels(x, user_na_to_na = TRUE, keep_var_label = TRUE) ``` ## Conditional conversion to factors{#unlabelled} For any analysis, it is the responsibility of user to identify which labelled numeric vectors should be considered as **categorical** (and therefore converted into factors using `to_factor()`) and which variables should be treated as **continuous** (and therefore unclassed into numeric using `base::unclass()`). It should be noted that most functions expect categorical variables to be coded as factors. It includes most modeling functions (such as `stats::lm()` or `stats::glm()`) or plotting functions from `ggplot2`. In most of cases, if data documentation was properly done, categorical variables corresponds to vectors where all observed values have a value label while vectors where only few values have a value label should be considered as continuous. In that situation, you could apply the `unlabelled()` method to an overall data frame. By default, `unlabelled()` works as follow: - if a column doesn't inherit the `haven_labelled` class, it will be not affected; - if all observed values have a corresponding value label, the column will be converted into a factor (using `to_factor()`); - otherwise, the column will be unclassed (and converted back to a numeric or character vector by applying `base::unclass()`). ```{r} df <- data.frame( a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), c = labelled(c(1, 1, 2, 2), labels = c(No = 1, Yes = 2, DK = 3)), d = labelled(c("a", "a", "b", "c"), labels = c(No = "a", Yes = "b")), e = labelled_spss( c(1, 9, 1, 2), labels = c(No = 1, Yes = 2), na_values = 9 ) ) df %>% look_for() unlabelled(df) %>% look_for() unlabelled(df, user_na_to_na = TRUE) %>% look_for() unlabelled(df, drop_unused_labels = TRUE) %>% look_for() ``` ## Importing labelled data In **haven** package, `read_spss`, `read_stata` and `read_sas` are natively importing data using the `labelled` class and the `label` attribute for variable labels. Functions from **foreign** package could also import some metadata from **SPSS** and **Stata** files. `to_labelled` can convert data imported with **foreign** into a labelled data frame. However, there are some limitations compared to using **haven**: - For **SPSS** files, it will be better to set `use.value.labels = FALSE`, `to.data.frame = FALSE` and `use.missings = FALSE` when calling `read.spss`. If `use.value.labels = TRUE`, variable with value labels will be converted into factors by `read.spss` (and kept as factors by `foreign_to_label`). If `to.data.frame = TRUE`, meta data describing the missing values will not be imported. If `use.missings = TRUE`, missing values would have been converted to `NA` by `read.spss`. - For **Stata** files, set `convert.factors = FALSE` when calling `read.dta` to avoid conversion of variables with value labels into factors. So far, missing values defined in Stata are always imported as `NA` by `read.dta` and could not be retrieved by `foreign_to_labelled`. The **memisc** package provide functions to import variable metadata and store them in specific object of class `data.set`. The `to_labelled` method can convert a data.set into a labelled data frame. ```{r, eval=FALSE} # from foreign library(foreign) df <- to_labelled(read.spss( "file.sav", to.data.frame = FALSE, use.value.labels = FALSE, use.missings = FALSE )) df <- to_labelled(read.dta( "file.dta", convert.factors = FALSE )) # from memisc library(memisc) nes1948.por <- UnZip("anes/NES1948.ZIP", "NES1948.POR", package = "memisc") nes1948 <- spss.portable.file(nes1948.por) df <- to_labelled(nes1948) ds <- as.data.set(nes19480) df <- to_labelled(ds) ``` ## Using labelled with dplyr/magrittr If you are using the `%>%` operator, you can use the functions `set_variable_labels()`, `set_value_labels()`, `add_value_labels()` and `remove_value_labels()`. ```{r} library(dplyr) df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) %>% set_variable_labels(s1 = "Sex", s2 = "Question") %>% set_value_labels(s1 = c(Male = "M", Female = "F"), s2 = c(Yes = 1, No = 2)) df$s2 ``` `set_value_labels()` will replace the list of value labels while `add_value_labels()` will update it. ```{r} df <- df %>% set_value_labels(s2 = c(Yes = 1, "Don't know" = 8, Unknown = 9)) df$s2 df <- df %>% add_value_labels(s2 = c(No = 2)) df$s2 ``` You can also remove some variable and/or value labels. ```{r} df <- df %>% set_variable_labels(s1 = NULL) # removing one value label df <- df %>% remove_value_labels(s2 = 2) df$s2 # removing several value labels df <- df %>% remove_value_labels(s2 = 8:9) df$s2 # removing all value labels df <- df %>% set_value_labels(s2 = NULL) df$s2 ``` To convert variables, the easiest is to use `unlabelled()`. ```{r} library(questionr) data(fertility) glimpse(women) glimpse(women %>% unlabelled()) ``` Alternatively, you can use functions as `dplyr::mutate()` + `dplyr::across()`. See the example below. ```{r} glimpse(to_factor(women)) glimpse(women %>% mutate(across(where(is.labelled), to_factor))) glimpse(women %>% mutate(across(employed:religion, to_factor))) ``` labelled/data/0000755000176200001440000000000014357761455012750 5ustar liggesuserslabelled/data/spss_file.rda0000644000176200001440000000710114357761455015426 0ustar liggesusersBZh91AY&SYFλW)tWL AHdEDLi@GwX sK&CIMG=14hЧ MԛQT=@2Gd2A)C3Q@h&@hhd4 #!d &hL2b 2h 4Ѡdh4 '444j220hhb0&00ёijdjh 2@6ѐ214 242h0EFDhɪxL@iOQ4I<Qiihm&MO|bT&JvUW3r!uUMDIM2SPEZ]6P%B50h @$P(#c9.\\\r')p%ȹpN:9rsA$ ID$V R$%VBVR⩤EL&$](,R4R 9Ӈ*"D7ISrDD0&M 20g"y:4h:504ҋD)ϥ{#e_%bbjH hbyG_ q CMԕf\150oVu,R/@kzݖ7Ů1yyy4ȹ+YpȾrY6k4cSIܳ7;k5{%7X+i#YI*Ou: ٺu=z1Ⅷ$;$vDXC$>N{#$o;] LLiA&ĠM(.@K,u@E &$ʺ@0@4/L:AA QQM#L"/Cațݑ|Gs .cb:lm;@2l 'v|lMbE͂e`JZM68;[ns wNI(8N/Z gTXyXJ!b),CnZL} E+jIy7ޜ37onZfx9 DdA5 9} .ΏrD%/O:etLh.P*"*Lh!10"i"&](*WS^#vn/3߼6G )z#hEG w UFY;ϙ=~Wzj{s <(r zm/^*̜=+ Z zNfv nhTw HwOzTaL9Ht2s:3O]].].zwכz')cܓ0`;/hlD// ^\0FF-cږFxq!'5㳍ݤrHzy:;4NHG.:3 g 4UMie/u4FR,mwnSE@i2Zqb=}nƯQX-GcWʷi[ɶ۸גW8N݊41n-],WcW;:?GvոM%ccv"Ē] IH]bb*9 5]##E(H (,&Lf2F A36A32Lc.8g_q뮸ƺmM].A),DCjĠlH`fMHdXfDfe`Ҳ,k7kJQt7s떹5CRED%b &-&" ioUcF4j#QbhLIbTICn1 X+--+FFj\mFƴ 1{Hta<xX5c׿Akɔ"0b A߅l!t3AYd `]%ηc9WR'.t7v^@g(8! gOz`* @6gN[J#e?۽!Ld7c7^oWa%΍5%Od1 1IK?w7tq1O1L0̈) fEg6fFڟ,3?)Ȇ 2\J*9gCw610`j_NwB KW>' + ]p@DcΪS.p vlabelled/data/x_haven_2.0.rda0000644000176200001440000000024414357761455015447 0ustar liggesusersBZh91AY&SY﹕b@P@O@ ShhPHG 4Bʰh~4M$Ma `4hP0"Tⓕ>X̻] |՜h$4 ||aa]BBgTlabelled/data/x_spss_haven_2.0.rda0000644000176200001440000000034114357761455016515 0ustar liggesusersBZh91AY&SYHPPԀ0@`0`SAL 6i&HLh@ 0$T&i 8)&' $  z@hpj! v0CH `OT?\t\ZEvbErJ+>ٝL  ъg c<7>Fe4^R# 0 j,?H]B@~Cllabelled/data/dta_file.rda0000644000176200001440000000235314357761455015212 0ustar liggesusersBZh91AY&SYHk}B0MbYq !P-boM"i m@z4i=F'y'm  zFj2DڃC#AF'MILjhbhm@@Ѡ@"$P52=M4"i`#d4hh h FAL#!aL#!LA2`e14Qi4hh4hi 4ACLA"V?UFKL%_" ۸P!Hd@ЋPHb꨻pl\mIJ A$&8"gZKPPZmȋjWHp&g}5:B$ ATd& 8$(6/ . :78j @VR ؐV! @7(PmK X\% K -׆QfV(10xEA1.lQ.@_ Fqb& *ՠ^SO*bw7͔+{|&PPs0!9sx{  Q,^d(؅Pi,'!3{~m{v1s kZq RMc^RBa{hiɺu}31q])HnʒhnI58QDN n~w%6;QnCM2!9LrE1[{yVJimIxsFv^2 Y4i Gi_3aUh*Q YսY"v}[uj%^H} "PtRz:SeE AEYKׄ#VI@.堸`PP:E<׍>&/!&sб>F5+$ӏ%") export("label_attribute<-") export("na_range<-") export("na_values<-") export("val_label<-") export("val_labels<-") export("var_label<-") export(add_value_labels) export(convert_list_columns_to_character) export(copy_labels) export(copy_labels_from) export(drop_unused_value_labels) export(duplicated_tagged_na) export(foreign_to_labelled) export(format_tagged_na) export(generate_dictionary) export(get_label_attribute) export(get_na_range) export(get_na_values) export(get_value_labels) export(get_variable_labels) export(is.labelled) export(is_prefixed) export(is_regular_na) export(is_tagged_na) export(is_user_na) export(label_attribute) export(labelled) export(labelled_spss) export(look_for) export(look_for_and_select) export(lookfor) export(lookfor_to_long_format) export(memisc_to_labelled) export(na_range) export(na_tag) export(na_values) export(names_prefixed_by_values) export(nolabel_to_na) export(order_tagged_na) export(print_labels) export(print_tagged_na) export(recode_if) export(remove_attributes) export(remove_labels) export(remove_user_na) export(remove_val_labels) export(remove_value_labels) export(remove_var_label) export(set_label_attribute) export(set_na_range) export(set_na_values) export(set_value_labels) export(set_variable_labels) export(sort_tagged_na) export(sort_val_labels) export(tagged_na) export(tagged_na_to_regular_na) export(tagged_na_to_user_na) export(to_character) export(to_factor) export(to_labelled) export(unique_tagged_na) export(unlabelled) export(update_labelled) export(update_value_labels_with) export(update_variable_labels_with) export(user_na_to_na) export(user_na_to_regular_na) export(user_na_to_tagged_na) export(val_label) export(val_labels) export(val_labels_to_na) export(var_label) import(rlang) importFrom(dplyr,`%>%`) importFrom(dplyr,recode) importFrom(dplyr,where) importFrom(haven,format_tagged_na) importFrom(haven,is.labelled) importFrom(haven,is_tagged_na) importFrom(haven,labelled) importFrom(haven,labelled_spss) importFrom(haven,na_tag) importFrom(haven,print_labels) importFrom(haven,print_tagged_na) importFrom(haven,tagged_na) importFrom(lifecycle,deprecate_soft) labelled/NEWS.md0000644000176200001440000002346614737253136013142 0ustar liggesusers# labelled 2.14.0 **New features** * in `update_variable_labels_with()`, it is now possible to access the variable name inside `.fn` by using `names()` (#163) * `var_label()` gets new options `"na"` and `"empty"` for `null_action` **Improvements** * systematic use of `{cli}` for errors, warnings and messages (#167) # labelled 2.13.0 **New features** * add a `null_action` argument to `val_labels()`, `val_label()` and a `.null_action` argument to `set_value_labels()`, `add_value_labels()` and `remove_value_labels()` (#145) * new functions `update_variable_labels_with()` and `update_value_labels_with()` allowing to update variable/value labels with a custom function (#153) **Bug fix** * avoid an error with `print.look_for()` when console pane is physically shrunk too small (#148) * fix in `recode.haven_labelled()` when `.x` contains `NA` and `.combine_value_labels = TRUE` (#151) * produce an error when trying to assign value labels to a Date vector (#156) # labelled 2.12.0 **New features** * support of variable labels for packed columns, see dedicated vignette (#142) * new helpers `label_attribute()`, `get_label_attribute()` and `set_label_attribute()` to manipulate the "label" attribute on any object (#142) * new functions `get_variable_labels()`, `get_value_labels()`, `get_na_values()` and `get_na_range()` identical to `var_label()`, `val_labels()`, `na_values()` and `na_range()`, respectively * `to_character()` method for data frames (#140) # labelled 2.11.0 **Improvements** * `set_value_labels()`, `add_value_labels()`, `remove_value_labels()`, `set_variable_labels()`, `set_na_range()` and `set_na_values()` can now be applied on a vector (#126) * new argument `null_action` for `var_label()` when applied on a data frame (#131) * `look_for()` now returns `"missing"` (number of `NA`s) by default (#133) **Bug fixes** * bug fix in `print.look_for()` (#135) * bug fix in `unlabelled()` for classic vectors, now remained unchanged (#137) # labelled 2.10.0 * `look_for()` now accepts `survey` objects (#121) # labelled 2.9.1 * improved error messages for missing variable names (#118, @ajb5d) * better implementation of `look_for()` when no keyword is provided (#116) * bug fix in `user_na_to_tagged_na()` (#114) # labelled 2.9.0 **look_for() improvements:** * new function `look_for_and_select()` (#87) * `look_for()` can now search within factor levels and value labels (#104) **improvements for tagged NAs:** * better printing of value labels (#89) * new functions `user_na_to_tagged_na()`, `tagged_na_to_user_na()` and `tagged_na_to_regular_na()` * new option `explicit_tagged_na` in `to_factor()` and `to_character()` * new functions `unique_tagged_na()`, `duplicated_tagged_na()`, `order_tagged_na()`, `sort_tagged_na()` (#90, #91) **other improvements:** * new functions `is_user_na()` and `is_regular_na()` * new set of unit tests (#99) * trying to apply a value label, `na_range()` or `na_values()` to a factor will now produce an error * bug fix in `foreign_to_labelled()` for Stata files (#100) # labelled 2.8.0 * new helper `recode_if()` for recoding values based on condition, variable and value labels being preserved (#82) * `look_for()` could be time consuming for big data frames. Now, by default, only basic details of each variable are computed. You can compute all details with `details = "full"` (#77) * printing of `look_for()` results has been updated and do not rely anymore on `pillar` (#85) * `to_labelled()` can properly manage factors whose levels are coded as "[code] level", as produced by `to_factor(levels = "prefixed")` (#74 @courtiol) * new function `is_prefixed()` to check if a factor is prefixed * bug fix for `na_range<-` and `na_values<-` when applied to a data.frame (#80) # labelled 2.7.0 * a `.values` argument has been added to `set_na_values()` and `set_na_range()`, allowing to pass a list of values * a `.strict` option has been added to `set_variable_labels()`, `set_value_labels()`, `add_value_labels()`, `remove_value_labels()`, `set_na_values()` and `set_na_range()`, allowing to pass values for columns not observed in the data (it could be useful for using a same list of labels for several data.frame sharing some variables) (#70) * `copy_labels()` is less restrictive for non labelled vectors, copying variable label even if the two vectors are not of the same type (#71) * a `.strict` option has been added to `copy_labels()` (#71) # labelled 2.6.0 * `look_for()` has been redesigned: - `look_for()` now returns a tibble - columns with multiple values for each variable are now stored as named lists - a print method has been added for a clearer presentation of results in the console - use `lookfor_to_long_format()` to convert results with one row per factor level and per value label - use `convert_list_columns_to_character()` to convert list columns to simpler character vectors - `generate_dictionary()` is an equivalent of `look_for()` * `set_variable_labels`, `set_value_labels`, `add_value_labels`, and `remove_value_labels` now accept "tidy dots" (#67 @psanker) * new function `names_prefixed_by_values()` to get the names of a vector prefixed by their corresponding value # labelled 2.5.0 * new `.keep_value_labels` argument for `recode.haven_labelled()` * new `.combine_value_labels` argument for `recode.haven_labelled()` (#61) * new `drop_unused_value_labels()` method * an additional `.labels` argument for `set_value_labels()` * `user_na_to_na` argument has been added to `to_character.haven_labelled()` * `%>%` is now imported from `dplyr` * a cheatsheet has been added (#47) * internal documentation is now using **roxygen2** markdown support # labelled 2.4.0 * fixes for haven 2.3.0 (#59 by @hadley) * correct re-export of functions from `haven` * `update_labelled()` has been improved to allow to reconstruct all labelled vectors created with a previous version of `haven` # labelled 2.3.1 * an additional argument `keep_var_label` for `remove_labels()` * bug fix for `unlabelled()` when applied on a vector * when using `unclass = TRUE` with `to_factor()`, attributes are not removed anymore # labelled 2.3.0 * new function `unlabelled()` # labelled 2.2.2 * bug fix for `look_for()` (#52 by @NoahMarconi) * bug fix in `val_labels_to_na()` documentation # labelled 2.2.1 * bug fix for `na_range()` and `na_values()`: variable labels are now preserved (#48, thanks to @mspittler) # labelled 2.2.0 * new function `copy_labels_from()`, compliant with `dplyr` syntax * `update_labelled()` is now more strict (#42 by @iago-pssjd) * new functions `look_for()` and `lookfor()` imported from `questionr` (#44) * new `unlist` option for `var_label()` * `tagged_na()` and similar functions are now imported from `haven` # labelled 2.1.0 * `var_label()`, applied to a data.frame, now accepts a character vector of same length as the number of columns. * `set_variable_labels` has a new `.labels` argument. * New `unclass` option in `to_factor()`, to be used when `strict = TRUE` (#36) * Following `haven` version 2.1.0, it is not mandatory anymore to define a value label before defining a SPSS style missing value. `labelled_spss()`, `na_values()` and `na_range()` have been updated accordingly (#37) # labelled 2.0.2 * `to_factor()` bug fix then applied on a data.frame (#33) # labelled 2.0.1 * `update_labelled()` bug fix then applied on a data.frame (#31) # labelled 2.0.0 ## BREAKING CHANGE * Following version 2.0.0 of `haven`, `labelled()` and `labelled_spss()` now produce objects with class "haven_labelled" and "haven_labelled_spss", due to conflict between the previous "labelled" class and the "labelled" class used by `Hmisc`. * A new function `update_labelled()` could be used to convert data imported with an older version of `haven` to the new classes. ## Other changes * `user_na_to_na` option added to `to_factor()` * `foreign_to_labelled()` now import SPSS missing values (#27) * a `strict` argument added to `to_factor()` (#25) * `remove_attributes()` preserve character vectors (#30) # labelled 1.1.0 * extend `dplyr::recode()` method to be compatible with labelled vectors. * `copy_labels()` now copy also `na_range` and `na_values` attributes. * new method `remove_attributes()` # labelled 1.0.1 * bug fix: argument `drop_unused_labels` could now be used with `to_factor.data.frame()` * new labels argument for `to_labelled()` method when applied to a factor * bug fix: appropriate column names with `data.frame` (#20) # labelled 1.0.0 * now imports `haven` * new function to deal with user defined missing values (SPSS style): `na_values()`, `na_range()`, `set_na_values()`, `set_na_values()`, `remove_user_na()`, `user_na_to_na()`. * `remove_labels()` has been updated. # labelled 0.2.3 * new functions `set_variable_labels()`, `set_value_labels()`, `add_value_labels()` and `remove_value_labels()` compatible with `%>%`. * new functions `remove_val_labels` and `remove_var_label()`. * bug fix in `to_character.labelled()` when applied to data frames. # labelled 0.2.2 * `to_factor()`, `to_character()` and `to_labelled.factor()` now preserves variable label. * bug fix in `to_factor()` when applied to data frames. # labelled 0.2.0 * Following evolution of `haven`, `labelled` doesn't support missing values anymore (cf. https://github.com/hadley/haven/commit/4b12ff9d51ddb9e7486966b85e0bcff44992904d) * New function `to_character()` (cf. https://github.com/larmarange/labelled/commit/3d32852587bb707d06627e56407eed1c9d5a49de) * `to_factor()` could now be applied to a data.frame (cf. https://github.com/larmarange/labelled/commit/ce1d750681fe0c9bcd767cb83a8d72ed4c5fc5fb) * If `data.table` is available, labelled attribute are now changed by _reference_ (cf. https://github.com/larmarange/labelled/commit/c8b163f706122844d798e6625779e8a65e5bbf41) * `zap_labels()` added as a synonym of `remove_labels()` labelled/inst/0000755000176200001440000000000014737431515013005 5ustar liggesuserslabelled/inst/doc/0000755000176200001440000000000014737431515013552 5ustar liggesuserslabelled/inst/doc/packed_columns.html0000644000176200001440000005540714737431514017441 0ustar liggesusers Variables labels and packed columns

Variables labels and packed columns

Joseph Larmarange

The tidyr package allows to group several columns of a tibble into one single df-column, see tidyr::pack(). Such df-column is itself a tibble. It’s not currently clear why you would ever want to pack columns since few functions work with this sort of data.

library(tidyr)
d <- iris %>%
  as_tibble() %>%
  pack(
    Sepal = starts_with("Sepal"),
    Petal = starts_with("Petal"),
    .names_sep = "."
  )
str(d)
## tibble [150 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sepal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##   ..$ Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##   .. ..- attr(*, "label")= chr "Length of petal"
##   ..$ Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##   .. ..- attr(*, "label")= chr "Width of Petal"
class(d$Sepal)
## [1] "tbl_df"     "tbl"        "data.frame"

Regarding variable labels, you may want to define a label for one sub-column of a df-column, or eventually a label for the df-column itself.

For a sub-column, you could use easily var_label() to define your label.

library(labelled)
var_label(d$Sepal$Length) <- "Length of the sepal"
str(d)
## tibble [150 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sepal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##   .. ..- attr(*, "label")= chr "Length of the sepal"
##   ..$ Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##   .. ..- attr(*, "label")= chr "Length of petal"
##   ..$ Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##   .. ..- attr(*, "label")= chr "Width of Petal"

But you cannot use directly var_label() for the df-column.

var_label(d$Petal) <- "wrong label for Petal"
str(d)
## tibble [150 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sepal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##   .. ..- attr(*, "label")= chr "Length of the sepal"
##   ..$ Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"
##   ..$ Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"

As d$Petal is itself a tibble, applying var_label() on it would have an effect on each sub-column. To change a variable label to the df-column itself, you could use label_attribute().

label_attribute(d$Petal) <- "correct label for Petal"
str(d)
## tibble [150 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sepal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##   .. ..- attr(*, "label")= chr "Length of the sepal"
##   ..$ Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"
##   ..$ Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"
##   ..- attr(*, "label")= chr "correct label for Petal"

On the other hand, set_variable_labels() works differently, as the primary intention of this function is to work on the columns of a tibble.

d <- d %>% set_variable_labels(Sepal = "Label of the Sepal df-column")
str(d)
## tibble [150 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sepal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##   .. ..- attr(*, "label")= chr "Length of the sepal"
##   ..$ Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##   ..- attr(*, "label")= chr "Label of the Sepal df-column"
##  $ Petal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"
##   ..$ Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"
##   ..- attr(*, "label")= chr "correct label for Petal"

This is equivalent to:

var_label(d) <- list(Sepal = "Label of the Sepal df-column")
str(d)
## tibble [150 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sepal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##   .. ..- attr(*, "label")= chr "Length of the sepal"
##   ..$ Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##   ..- attr(*, "label")= chr "Label of the Sepal df-column"
##  $ Petal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"
##   ..$ Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"
##   ..- attr(*, "label")= chr "correct label for Petal"

To use set_variable_labels() on sub-columns, you should use this syntax:

d$Petal <- d$Petal %>%
  set_variable_labels(
    Length = "Petal length",
    Width = "Petal width"
  )
str(d)
## tibble [150 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sepal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##   .. ..- attr(*, "label")= chr "Length of the sepal"
##   ..$ Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##   ..- attr(*, "label")= chr "Label of the Sepal df-column"
##  $ Petal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##   .. ..- attr(*, "label")= chr "Petal length"
##   ..$ Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##   .. ..- attr(*, "label")= chr "Petal width"
##   ..- attr(*, "label")= chr "correct label for Petal"

If you want to get the list of variable labels of a tibble, by default var_label() or get_variable_labels() will return the labels of the first level of columns.

d %>% get_variable_labels()
## $Species
## NULL
## 
## $Sepal
## [1] "Label of the Sepal df-column"
## 
## $Petal
## [1] "correct label for Petal"

To obtain the list of variable labels for sub-columns, you could use recurse = TRUE:

d %>% get_variable_labels(recurse = TRUE)
## $Species
## NULL
## 
## $Sepal
## $Sepal$Length
## [1] "Length of the sepal"
## 
## $Sepal$Width
## NULL
## 
## 
## $Petal
## $Petal$Length
## [1] "Petal length"
## 
## $Petal$Width
## [1] "Petal width"
d %>%
  get_variable_labels(
    recurse = TRUE,
    null_action = "fill",
    unlist = TRUE
  )
##               Species          Sepal.Length           Sepal.Width 
##             "Species" "Length of the sepal"               "Width" 
##          Petal.Length           Petal.Width 
##        "Petal length"         "Petal width"
labelled/inst/doc/look_for.Rmd0000644000176200001440000001150414737244525016034 0ustar liggesusers--- author: "Joseph Larmarange" title: "Generate a data dictionary and search for variables with `look_for()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Generate a data dictionary and search for variables with `look_for()`} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ## Showing a summary of a data frame ### Default printing of tibbles It is a common need to easily get a description of all variables in a data frame. When a data frame is converted into a tibble (e.g. with `dplyr::as_tibble()`), it as a nice printing showing the first rows of the data frame as well as the type of column. ```{r message=FALSE} library(dplyr) ``` ```{r} iris %>% as_tibble() ``` However, when you have too many variables, all of them cannot be printed and their are just listed. ```{r} data(fertility, package = "questionr") women ``` Note: in **R** console, value labels (if defined) are usually printed but they do not appear in a R markdown document like this vignette. ### `dplyr::glimpse()` The function `dplyr::glimpse()` allows you to have a quick look at all the variables in a data frame. ```{r} glimpse(iris) glimpse(women) ``` It will show you the first values of each variable as well as the type of each variable. However, some important informations are not displayed: - variable labels, when defined; - value labels for labelled vectors; - the list of levels for factors; - the range of values for numerical variables. ### `labelled::look_for()` `look_for()` provided by the `labelled` package will print in the console a data dictionary of all variables, showing variable labels when available, the type of variable and a list of values corresponding to: - levels for factors; - value labels for labelled vectors; - the range of observed values in the vector otherwise (if `details = "full"`). ```{r} library(labelled) look_for(iris) look_for(women) ``` Note that `lookfor()` and `generate_dictionary()` are synonyms of `look_for()` and works exactly in the same way. If there is not enough space to print full labels in the console, they will be truncated (truncation is indicated by a `~`). ## Searching variables by key When a data frame has dozens or even hundreds of variables, it could become difficult to find a specific variable. In such case, you can provide an optional list of keywords, which can be simple character strings or regular expression, to search for specific variables. ```{r} # Look for a single keyword. look_for(iris, "petal") look_for(iris, "s") # Look for with a regular expression look_for(iris, "petal|species") look_for(iris, "s$") # Look for with several keywords look_for(iris, "pet", "sp") # Look_for will take variable labels into account look_for(women, "read", "level") ``` By default, `look_for()` will look through both variable names and variables labels. Use `labels = FALSE` to look only through variable names. ```{r} look_for(women, "read") look_for(women, "read", labels = FALSE) ``` Similarly, the search is by default case insensitive. To make the search case sensitive, use `ignore.case = FALSE`. ```{r} look_for(iris, "sepal") look_for(iris, "sepal", ignore.case = FALSE) ``` ## Level of details If you just want to use the search feature of `look_for()` without computing the details of each variable, simply indicate `details = "none"` or `details = FALSE`. ```{r} look_for(women, "id", details = "none") ``` If you want more details (but can be time consuming for big data frames), indicate `details = "full"` or `details = TRUE`. ```{r} look_for(women, details = "full") look_for(women, details = "full") %>% dplyr::glimpse() ``` ## Advanced usages of `look_for()` `look_for()` returns a detailed tibble which is summarized before printing. To deactivate default printing and see full results, simply use `dplyr::as_tibble()`, `dplyr::glimpse()` or even `utils::View()`. ```{r, eval=FALSE} look_for(women) %>% View() ``` ```{r} look_for(women) %>% as_tibble() glimpse(look_for(women)) ``` The tibble returned by `look_for()` could be easily manipulated for advanced programming. When a column has several values for one variable (e.g. `levels` or `value_labels`), results as stored with nested named list. You can convert named lists into simpler character vectors, you can use `convert_list_columns_to_character()`. ```{r} look_for(women) %>% convert_list_columns_to_character() ``` Alternatively, you can use `lookfor_to_long_format()` to transform results into a long format with one row per factor level and per value label. ```{r} look_for(women) %>% lookfor_to_long_format() ``` Both can be combined: ```{r} look_for(women) %>% lookfor_to_long_format() %>% convert_list_columns_to_character() ``` labelled/inst/doc/packed_columns.R0000644000176200001440000000305014737431514016661 0ustar liggesusers## ----------------------------------------------------------------------------- library(tidyr) d <- iris %>% as_tibble() %>% pack( Sepal = starts_with("Sepal"), Petal = starts_with("Petal"), .names_sep = "." ) str(d) class(d$Sepal) ## ----------------------------------------------------------------------------- library(labelled) var_label(d$Sepal$Length) <- "Length of the sepal" str(d) ## ----------------------------------------------------------------------------- var_label(d$Petal) <- "wrong label for Petal" str(d) ## ----------------------------------------------------------------------------- label_attribute(d$Petal) <- "correct label for Petal" str(d) ## ----------------------------------------------------------------------------- d <- d %>% set_variable_labels(Sepal = "Label of the Sepal df-column") str(d) ## ----------------------------------------------------------------------------- var_label(d) <- list(Sepal = "Label of the Sepal df-column") str(d) ## ----------------------------------------------------------------------------- d$Petal <- d$Petal %>% set_variable_labels( Length = "Petal length", Width = "Petal width" ) str(d) ## ----------------------------------------------------------------------------- d %>% get_variable_labels() ## ----------------------------------------------------------------------------- d %>% get_variable_labels(recurse = TRUE) d %>% get_variable_labels( recurse = TRUE, null_action = "fill", unlist = TRUE ) labelled/inst/doc/labelled.R0000644000176200001440000002122614737431503015441 0ustar liggesusers## ----------------------------------------------------------------------------- library(labelled) var_label(iris$Sepal.Length) <- "Length of sepal" ## ----------------------------------------------------------------------------- var_label(iris) <- list( Petal.Length = "Length of petal", Petal.Width = "Width of Petal" ) ## ----------------------------------------------------------------------------- var_label(iris$Petal.Width) var_label(iris) ## ----------------------------------------------------------------------------- var_label(iris$Sepal.Length) <- NULL ## ----eval=FALSE--------------------------------------------------------------- # View(iris) ## ----------------------------------------------------------------------------- look_for(iris) look_for(iris, "pet") look_for(iris, details = FALSE) ## ----------------------------------------------------------------------------- v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 8, refused = 9) ) v ## ----------------------------------------------------------------------------- val_labels(v) val_label(v, 8) ## ----------------------------------------------------------------------------- val_labels(v) <- c(yes = 1, nno = 3, bug = 5) v val_label(v, 3) <- "no" v ## ----------------------------------------------------------------------------- val_label(v, 2) <- "maybe" val_label(v, 5) <- NULL v ## ----------------------------------------------------------------------------- val_labels(v) <- NULL v ## ----------------------------------------------------------------------------- val_label(v, 1) <- "yes" v ## ----error = TRUE------------------------------------------------------------- f <- factor(1:3) f val_labels(f) <- c(yes = 1, no = 3) ## ----------------------------------------------------------------------------- df <- data.frame(v1 = 1:3, v2 = c(2, 3, 1), v3 = 3:1) val_label(df, 1) <- "yes" val_label(df[, c("v1", "v3")], 2) <- "maybe" val_label(df[, c("v2", "v3")], 3) <- "no" val_labels(df) val_labels(df[, c("v1", "v3")]) <- c(YES = 1, MAYBE = 2, NO = 3) val_labels(df) val_labels(df) <- NULL val_labels(df) val_labels(df) <- list(v1 = c(yes = 1, no = 3), v2 = c(a = 1, b = 2, c = 3)) val_labels(df) ## ----------------------------------------------------------------------------- v <- c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA) val_label(v, 1) <- "yes" val_label(v, 3) <- "no" val_label(v, 9) <- "refused" val_label(v, 2) <- "maybe" val_label(v, 8) <- "don't know" v ## ----------------------------------------------------------------------------- sort_val_labels(v) sort_val_labels(v, decreasing = TRUE) ## ----------------------------------------------------------------------------- sort_val_labels(v, according_to = "l") ## ----------------------------------------------------------------------------- v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) v na_values(v) <- 9 na_values(v) v is.na(v) na_values(v) <- NULL v na_range(v) <- c(5, Inf) na_range(v) v ## ----------------------------------------------------------------------------- x <- c(1, 2, 2, 9) na_values(x) <- 9 x ## ----------------------------------------------------------------------------- v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v v2 <- user_na_to_na(v) v2 ## ----------------------------------------------------------------------------- v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v v2 <- remove_user_na(v) v2 ## ----------------------------------------------------------------------------- v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v na_values(v) <- NULL v ## ----------------------------------------------------------------------------- v <- labelled(c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, maybe = 2, no = 3)) v nolabel_to_na(v) ## ----------------------------------------------------------------------------- size <- labelled(c(1.88, 1.62, 1.78, 99, 1.91), c("not measured" = 99)) size ## ----------------------------------------------------------------------------- val_labels_to_na(size) ## ----------------------------------------------------------------------------- v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 8, refused = 9) ) v to_factor(v) ## ----------------------------------------------------------------------------- to_factor(v, levels = "v") to_factor(v, levels = "p") ## ----------------------------------------------------------------------------- to_factor(v, ordered = TRUE) ## ----------------------------------------------------------------------------- to_factor(v, nolabel_to_na = TRUE) to_factor(nolabel_to_na(v)) ## ----------------------------------------------------------------------------- to_factor(v, sort_levels = "n") to_factor(v, sort_levels = "v") to_factor(v, sort_levels = "l") ## ----------------------------------------------------------------------------- f <- factor(1:3, labels = c("a", "b", "c")) to_labelled(f) ## ----------------------------------------------------------------------------- v to_labelled(to_factor(v)) ## ----------------------------------------------------------------------------- v to_character(v) ## ----------------------------------------------------------------------------- unclass(v) ## ----------------------------------------------------------------------------- remove_val_labels(v) ## ----------------------------------------------------------------------------- remove_val_labels(v) ## ----------------------------------------------------------------------------- x <- c(1, 2, 2, 9) na_values(x) <- 9 val_labels(x) <- c(yes = 1, no = 2) var_label(x) <- "A test variable" x remove_val_labels(x) remove_user_na(x) remove_user_na(x, user_na_to_na = TRUE) remove_val_labels(remove_user_na(x)) unclass(x) ## ----------------------------------------------------------------------------- remove_labels(x, user_na_to_na = TRUE) remove_labels(x, user_na_to_na = TRUE, keep_var_label = TRUE) ## ----------------------------------------------------------------------------- df <- data.frame( a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), c = labelled(c(1, 1, 2, 2), labels = c(No = 1, Yes = 2, DK = 3)), d = labelled(c("a", "a", "b", "c"), labels = c(No = "a", Yes = "b")), e = labelled_spss( c(1, 9, 1, 2), labels = c(No = 1, Yes = 2), na_values = 9 ) ) df %>% look_for() unlabelled(df) %>% look_for() unlabelled(df, user_na_to_na = TRUE) %>% look_for() unlabelled(df, drop_unused_labels = TRUE) %>% look_for() ## ----eval=FALSE--------------------------------------------------------------- # # from foreign # library(foreign) # df <- to_labelled(read.spss( # "file.sav", # to.data.frame = FALSE, # use.value.labels = FALSE, # use.missings = FALSE # )) # df <- to_labelled(read.dta( # "file.dta", # convert.factors = FALSE # )) # # # from memisc # library(memisc) # nes1948.por <- UnZip("anes/NES1948.ZIP", "NES1948.POR", package = "memisc") # nes1948 <- spss.portable.file(nes1948.por) # df <- to_labelled(nes1948) # ds <- as.data.set(nes19480) # df <- to_labelled(ds) ## ----------------------------------------------------------------------------- library(dplyr) df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) %>% set_variable_labels(s1 = "Sex", s2 = "Question") %>% set_value_labels(s1 = c(Male = "M", Female = "F"), s2 = c(Yes = 1, No = 2)) df$s2 ## ----------------------------------------------------------------------------- df <- df %>% set_value_labels(s2 = c(Yes = 1, "Don't know" = 8, Unknown = 9)) df$s2 df <- df %>% add_value_labels(s2 = c(No = 2)) df$s2 ## ----------------------------------------------------------------------------- df <- df %>% set_variable_labels(s1 = NULL) # removing one value label df <- df %>% remove_value_labels(s2 = 2) df$s2 # removing several value labels df <- df %>% remove_value_labels(s2 = 8:9) df$s2 # removing all value labels df <- df %>% set_value_labels(s2 = NULL) df$s2 ## ----------------------------------------------------------------------------- library(questionr) data(fertility) glimpse(women) glimpse(women %>% unlabelled()) ## ----------------------------------------------------------------------------- glimpse(to_factor(women)) glimpse(women %>% mutate(across(where(is.labelled), to_factor))) glimpse(women %>% mutate(across(employed:religion, to_factor))) labelled/inst/doc/missing_values.html0000644000176200001440000011114414737431513017470 0ustar liggesusers About missing values: regular NAs, tagged NAs and user NAs

About missing values: regular NAs, tagged NAs and user NAs

Joseph Larmarange

In base R, missing values are indicated using the specific value NA. Regular NAs could be used with any type of vector (double, integer, character, factor, Date, etc.).

Other statistical software have implemented ways to differentiate several types of missing values.

Stata and SAS have a system of tagged NAs, where NA values are tagged with a letter (from a to z). SPSS allows users to indicate that certain non-missing values should be treated in some analysis as missing (user NAs). The haven package implements tagged NAs and user NAs in order to keep this information when importing files from Stata, SAS or SPSS.

library(labelled)

Tagged NAs

Creation and tests

Tagged NAs are proper NA values with a tag attached to them. They can be created with tagged_na(). The attached tag should be a single letter, lowercase (a-z) or uppercase (A-Z).

x <- c(1:5, tagged_na("a"), tagged_na("z"), NA)

For most R functions, tagged NAs are just considered as regular NAs. By default, they are just printed as any other regular NA.

x
## [1]  1  2  3  4  5 NA NA NA
is.na(x)
## [1] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE

To show/print their tags, you need to use na_tag(), print_tagged_na() or format_tagged_na().

na_tag(x)
## [1] NA  NA  NA  NA  NA  "a" "z" NA
print_tagged_na(x)
## [1]     1     2     3     4     5 NA(a) NA(z)    NA
format_tagged_na(x)
## [1] "    1" "    2" "    3" "    4" "    5" "NA(a)" "NA(z)" "   NA"

To test if a certain NA is a regular NA or a tagged NA, you should use is_regular_na() or is_tagged_na().

is.na(x)
## [1] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE
is_tagged_na(x)
## [1] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE
# You can test for specific tagged NAs with the second argument
is_tagged_na(x, "a")
## [1] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE
is_regular_na(x)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE

Tagged NAs could be defined only for double vectors. If you add a tagged NA to a character vector, it will be converted into a regular NA. If you add a tagged NA to an integer vector, the vector will be converted into a double vector.

y <- c("a", "b", tagged_na("z"))
y
## [1] "a" "b" NA
is_tagged_na(y)
## [1] FALSE FALSE FALSE
format_tagged_na(y)
## Error: `x` must be a double vector
z <- c(1L, 2L, tagged_na("a"))
typeof(z)
## [1] "double"
format_tagged_na(z)
## [1] "    1" "    2" "NA(a)"

Unique values, duplicates and sorting with tagged NAs

By default, functions such as base::unique(), base::duplicated(), base::order() or base::sort() will treat tagged NAs as the same thing as a regular NA. You can use unique_tagged_na(), duplicated_tagged_na(), order_tagged_na() and sort_tagged_na() as alternatives that will treat two tagged NAs with different tags as separate values.

x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA)
x %>% print_tagged_na()
## [1]     1     2 NA(a)     1 NA(z)     2 NA(a)    NA
unique(x) %>% print_tagged_na()
## [1]     1     2 NA(a)
unique_tagged_na(x) %>% print_tagged_na()
## [1]     1     2 NA(a) NA(z)    NA
duplicated(x)
## [1] FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
duplicated_tagged_na(x)
## [1] FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE
sort(x, na.last = TRUE) %>% print_tagged_na()
## [1]     1     1     2     2 NA(a) NA(z) NA(a)    NA
sort_tagged_na(x) %>% print_tagged_na()
## [1]     1     1     2     2 NA(a) NA(a) NA(z)    NA

Tagged NAs and value labels

It is possible to define value labels for tagged NAs.

x <- c(1, 0, 1, tagged_na("r"), 0, tagged_na("d"), tagged_na("z"), NA)
val_labels(x) <- c(
  no = 0, yes = 1,
  "don't know" = tagged_na("d"),
  refusal = tagged_na("r")
)
x
## <labelled<double>[8]>
## [1]     1     0     1 NA(r)     0 NA(d) NA(z)    NA
## 
## Labels:
##  value      label
##      0         no
##      1        yes
##  NA(d) don't know
##  NA(r)    refusal

When converting such labelled vector into factor, tagged NAs are, by default, converted into regular NAs (it is not possible to define tagged NAs with factors).

to_factor(x)
## [1] yes  no   yes  <NA> no   <NA> <NA> <NA>
## Levels: no yes

However, the option explicit_tagged_na of to_factor() allows to transform tagged NAs into explicit factor levels.

to_factor(x, explicit_tagged_na = TRUE)
## [1] yes        no         yes        refusal    no         don't know NA(z)     
## [8] <NA>      
## Levels: no yes don't know refusal NA(z)
to_factor(x, levels = "prefixed", explicit_tagged_na = TRUE)
## [1] [1] yes            [0] no             [1] yes            [NA(r)] refusal   
## [5] [0] no             [NA(d)] don't know [NA(z)] NA(z)      <NA>              
## Levels: [0] no [1] yes [NA(d)] don't know [NA(r)] refusal [NA(z)] NA(z)

Conversion into user NAs

Tagged NAs can be converted into user NAs with tagged_na_to_user_na().

tagged_na_to_user_na(x)
## <labelled_spss<double>[8]>
## [1]  1  0  1  3  0  2  4 NA
## Missing range:  [2, 4]
## 
## Labels:
##  value      label
##      0         no
##      1        yes
##      2 don't know
##      3    refusal
##      4      NA(z)
tagged_na_to_user_na(x, user_na_start = 10)
## <labelled_spss<double>[8]>
## [1]  1  0  1 11  0 10 12 NA
## Missing range:  [10, 12]
## 
## Labels:
##  value      label
##      0         no
##      1        yes
##     10 don't know
##     11    refusal
##     12      NA(z)

Use tagged_na_to_regular_na() to convert tagged NAs into regular NAs.

tagged_na_to_regular_na(x)
## <labelled<double>[8]>
## [1]  1  0  1 NA  0 NA NA NA
## 
## Labels:
##  value label
##      0    no
##      1   yes
tagged_na_to_regular_na(x) %>% is_tagged_na()
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

User NAs

haven introduced an haven_labelled_spss class to deal with user defined missing values in a similar way as SPSS. In such case, additional attributes will be used to indicate with values should be considered as missing, but such values will not be stored as internal NA values. You should note that most R function will not take this information into account. Therefore, you will have to convert missing values into NA if required before analysis. These defined missing values could co-exist with internal NA values.

Creation

User NAs could be created directly with labelled_spss(). You can also manipulate them with na_values() and na_range().

v <- labelled(c(1, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9))
v
## <labelled<double>[8]>
## [1]  1  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know
na_values(v) <- 9
v
## <labelled_spss<double>[8]>
## [1]  1  2  3  9  1  3  2 NA
## Missing values: 9
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know
na_values(v) <- NULL
v
## <labelled<double>[8]>
## [1]  1  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know
na_range(v) <- c(5, Inf)
na_range(v)
## [1]   5 Inf
v
## <labelled_spss<double>[8]>
## [1]  1  2  3  9  1  3  2 NA
## Missing range:  [5, Inf]
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know

NB: you cant also use set_na_range() and set_na_values() for a dplyr-like syntax.

library(dplyr)
# setting value labels and user NAs
df <- tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) %>%
  set_value_labels(s2 = c(yes = 1, no = 2)) %>%
  set_na_values(s2 = 9)
df$s2
## <labelled_spss<double>[4]>
## [1] 1 1 2 9
## Missing values: 9
## 
## Labels:
##  value label
##      1   yes
##      2    no
# removing user NAs
df <- df %>% set_na_values(s2 = NULL)
df$s2
## <labelled<double>[4]>
## [1] 1 1 2 9
## 
## Labels:
##  value label
##      1   yes
##      2    no

Tests

Note that is.na() will return TRUE for user NAs. Use is_user_na() to test if a specific value is a user NA and is_regular_na() to test if it is a regular NA.

v
## <labelled_spss<double>[8]>
## [1]  1  2  3  9  1  3  2 NA
## Missing range:  [5, Inf]
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know
is.na(v)
## [1] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE
is_user_na(v)
## [1] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE
is_regular_na(v)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE

Conversion

For most R functions, user NAs values are still regular values.

x <- c(1:5, 11:15)
na_range(x) <- c(10, Inf)
val_labels(x) <- c("dk" = 11, "refused" = 15)
x
## <labelled_spss<integer>[10]>
##  [1]  1  2  3  4  5 11 12 13 14 15
## Missing range:  [10, Inf]
## 
## Labels:
##  value   label
##     11      dk
##     15 refused
mean(x)
## [1] 8

You can convert user NAs into regular NAs with user_na_to_na() or user_na_to_regular_na() (both functions are identical).

user_na_to_na(x)
## <labelled<integer>[10]>
##  [1]  1  2  3  4  5 NA NA NA NA NA
mean(user_na_to_na(x), na.rm = TRUE)
## [1] 3

Alternatively, if the vector is numeric, you can convert user NAs into tagged NAs with user_na_to_tagged_na().

user_na_to_tagged_na(x)
## ℹ `x` has been converted into a double vector.
## <labelled<double>[10]>
##  [1]     1     2     3     4     5 NA(a) NA(b) NA(c) NA(d) NA(e)
## 
## Labels:
##  value   label
##  NA(a)      dk
##  NA(e) refused
mean(user_na_to_tagged_na(x), na.rm = TRUE)
## ℹ `x` has been converted into a double vector.
## [1] 3

Finally, you can also remove user NAs definition without converting these values to NA, using remove_user_na().

remove_user_na(x)
## <labelled<integer>[10]>
##  [1]  1  2  3  4  5 11 12 13 14 15
## 
## Labels:
##  value   label
##     11      dk
##     15 refused
mean(remove_user_na(x))
## [1] 8
labelled/inst/doc/packed_columns.Rmd0000644000176200001440000000465014466735327017221 0ustar liggesusers--- author: "Joseph Larmarange" title: "Variables labels and packed columns" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Variables labels and packed columns} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- The **tidyr** package allows to group several columns of a tibble into one single df-column, see `tidyr::pack()`. Such df-column is itself a tibble. It's not currently clear why you would ever want to pack columns since few functions work with this sort of data. ```{r} library(tidyr) d <- iris %>% as_tibble() %>% pack( Sepal = starts_with("Sepal"), Petal = starts_with("Petal"), .names_sep = "." ) str(d) class(d$Sepal) ``` Regarding variable labels, you may want to define a label for one sub-column of a df-column, or eventually a label for the df-column itself. For a sub-column, you could use easily `var_label()` to define your label. ```{r} library(labelled) var_label(d$Sepal$Length) <- "Length of the sepal" str(d) ``` But you cannot use directly `var_label()` for the df-column. ```{r} var_label(d$Petal) <- "wrong label for Petal" str(d) ``` As `d$Petal` is itself a tibble, applying `var_label()` on it would have an effect on each sub-column. To change a variable label to the df-column itself, you could use `label_attribute()`. ```{r} label_attribute(d$Petal) <- "correct label for Petal" str(d) ``` On the other hand, `set_variable_labels()` works differently, as the primary intention of this function is to work on the columns of a tibble. ```{r} d <- d %>% set_variable_labels(Sepal = "Label of the Sepal df-column") str(d) ``` This is equivalent to: ```{r} var_label(d) <- list(Sepal = "Label of the Sepal df-column") str(d) ``` To use `set_variable_labels()` on sub-columns, you should use this syntax: ```{r} d$Petal <- d$Petal %>% set_variable_labels( Length = "Petal length", Width = "Petal width" ) str(d) ``` If you want to get the list of variable labels of a tibble, by default `var_label()` or `get_variable_labels()` will return the labels of the first level of columns. ```{r} d %>% get_variable_labels() ``` To obtain the list of variable labels for sub-columns, you could use `recurse = TRUE`: ```{r} d %>% get_variable_labels(recurse = TRUE) d %>% get_variable_labels( recurse = TRUE, null_action = "fill", unlist = TRUE ) ``` labelled/inst/doc/look_for.html0000644000176200001440000013307014737431511016252 0ustar liggesusers Generate a data dictionary and search for variables with look_for()

Generate a data dictionary and search for variables with look_for()

Joseph Larmarange

Showing a summary of a data frame

Default printing of tibbles

It is a common need to easily get a description of all variables in a data frame.

When a data frame is converted into a tibble (e.g. with dplyr::as_tibble()), it as a nice printing showing the first rows of the data frame as well as the type of column.

library(dplyr)
iris %>% as_tibble()
## # A tibble: 150 × 5
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
##           <dbl>       <dbl>        <dbl>       <dbl> <fct>  
##  1          5.1         3.5          1.4         0.2 setosa 
##  2          4.9         3            1.4         0.2 setosa 
##  3          4.7         3.2          1.3         0.2 setosa 
##  4          4.6         3.1          1.5         0.2 setosa 
##  5          5           3.6          1.4         0.2 setosa 
##  6          5.4         3.9          1.7         0.4 setosa 
##  7          4.6         3.4          1.4         0.3 setosa 
##  8          5           3.4          1.5         0.2 setosa 
##  9          4.4         2.9          1.4         0.2 setosa 
## 10          4.9         3.1          1.5         0.1 setosa 
## # ℹ 140 more rows

However, when you have too many variables, all of them cannot be printed and their are just listed.

data(fertility, package = "questionr")
women
## # A tibble: 2,000 × 17
##    id_woman id_household weight interview_date date_of_birth   age residency
##       <dbl>        <dbl>  <dbl> <date>         <date>        <dbl> <dbl+lbl>
##  1      391          381  1.80  2012-05-05     1997-03-07       15 2 [rural]
##  2     1643         1515  1.80  2012-01-23     1982-01-06       30 2 [rural]
##  3       85           85  1.80  2012-01-21     1979-01-01       33 2 [rural]
##  4      881          844  1.80  2012-01-06     1968-03-29       43 2 [rural]
##  5     1981         1797  1.80  2012-05-11     1986-05-25       25 2 [rural]
##  6     1072         1015  0.998 2012-02-20     1993-07-03       18 2 [rural]
##  7     1978         1794  0.998 2012-02-23     1967-01-28       45 2 [rural]
##  8     1607         1486  0.998 2012-02-20     1989-01-21       23 2 [rural]
##  9      738          711  0.192 2012-03-09     1962-07-24       49 2 [rural]
## 10     1656         1525  0.192 2012-03-15     1980-12-25       31 2 [rural]
## # ℹ 1,990 more rows
## # ℹ 10 more variables: region <dbl+lbl>, instruction <dbl+lbl>,
## #   employed <dbl+lbl>, matri <dbl+lbl>, religion <dbl+lbl>,
## #   newspaper <dbl+lbl>, radio <dbl+lbl>, tv <dbl+lbl>,
## #   ideal_nb_children <dbl+lbl>, test <dbl+lbl>

Note: in R console, value labels (if defined) are usually printed but they do not appear in a R markdown document like this vignette.

dplyr::glimpse()

The function dplyr::glimpse() allows you to have a quick look at all the variables in a data frame.

glimpse(iris)
## Rows: 150
## Columns: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.…
## $ Sepal.Width  <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.…
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.…
## $ Petal.Width  <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.…
## $ Species      <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s…
glimpse(women)
## Rows: 2,000
## Columns: 17
## $ id_woman          <dbl> 391, 1643, 85, 881, 1981, 1072, 1978, 1607, 738, 165…
## $ id_household      <dbl> 381, 1515, 85, 844, 1797, 1015, 1794, 1486, 711, 152…
## $ weight            <dbl> 1.803150, 1.803150, 1.803150, 1.803150, 1.803150, 0.…
## $ interview_date    <date> 2012-05-05, 2012-01-23, 2012-01-21, 2012-01-06, 201…
## $ date_of_birth     <date> 1997-03-07, 1982-01-06, 1979-01-01, 1968-03-29, 198…
## $ age               <dbl> 15, 30, 33, 43, 25, 18, 45, 23, 49, 31, 26, 45, 25, …
## $ residency         <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ region            <dbl+lbl> 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, …
## $ instruction       <dbl+lbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 0, …
## $ employed          <dbl+lbl> 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ matri             <dbl+lbl> 0, 2, 2, 2, 1, 0, 1, 1, 2, 5, 2, 3, 0, 2, 1, 2, …
## $ religion          <dbl+lbl> 1, 3, 2, 3, 2, 2, 3, 1, 3, 3, 2, 3, 2, 2, 2, 2, …
## $ newspaper         <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ radio             <dbl+lbl> 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, …
## $ tv                <dbl+lbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, …
## $ ideal_nb_children <dbl+lbl>  4,  4,  4,  4,  4,  5, 10,  5,  4,  5,  6, 10, …
## $ test              <dbl+lbl> 0, 9, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, …

It will show you the first values of each variable as well as the type of each variable. However, some important informations are not displayed:

  • variable labels, when defined;
  • value labels for labelled vectors;
  • the list of levels for factors;
  • the range of values for numerical variables.

labelled::look_for()

look_for() provided by the labelled package will print in the console a data dictionary of all variables, showing variable labels when available, the type of variable and a list of values corresponding to:

  • levels for factors;
  • value labels for labelled vectors;
  • the range of observed values in the vector otherwise (if details = "full").
library(labelled)
look_for(iris)
##  pos variable     label           col_type missing values    
##  1   Sepal.Length —               dbl      0                 
##  2   Sepal.Width  —               dbl      0                 
##  3   Petal.Length Length of petal dbl      0                 
##  4   Petal.Width  Width of Petal  dbl      0                 
##  5   Species      —               fct      0       setosa    
##                                                    versicolor
##                                                    virginica
look_for(women)
##  pos variable          label              col_type missing values            
##  1   id_woman          Woman Id           dbl      0                         
##  2   id_household      Household Id       dbl      0                         
##  3   weight            Sample weight      dbl      0                         
##  4   interview_date    Interview date     date     0                         
##  5   date_of_birth     Date of birth      date     0                         
##  6   age               Age at last anniv~ dbl      0                         
##  7   residency         Urban / rural res~ dbl+lbl  0       [1] urban         
##                                                            [2] rural         
##  8   region            Region             dbl+lbl  0       [1] North         
##                                                            [2] East          
##                                                            [3] South         
##                                                            [4] West          
##  9   instruction       Level of instruct~ dbl+lbl  0       [0] none          
##                                                            [1] primary       
##                                                            [2] secondary     
##                                                            [3] higher        
##  10  employed          Employed?          dbl+lbl  7       [0] no            
##                                                            [1] yes           
##                                                            [9] missing       
##  11  matri             Matrimonial status dbl+lbl  0       [0] single        
##                                                            [1] married       
##                                                            [2] living togeth~
##                                                            [3] windowed      
##                                                            [4] divorced      
##                                                            [5] separated     
##  12  religion          Religion           dbl+lbl  4       [1] Muslim        
##                                                            [2] Christian     
##                                                            [3] Protestant    
##                                                            [4] no religion   
##                                                            [5] other         
##  13  newspaper         Read newspaper?    dbl+lbl  0       [0] no            
##                                                            [1] yes           
##  14  radio             Listen to radio?   dbl+lbl  0       [0] no            
##                                                            [1] yes           
##  15  tv                Watch TV?          dbl+lbl  0       [0] no            
##                                                            [1] yes           
##  16  ideal_nb_children Ideal number of c~ dbl+lbl  0       [96] don't know   
##                                                            [99] missing      
##  17  test              Ever tested for H~ dbl+lbl  29      [0] no            
##                                                            [1] yes           
##                                                            [9] missing

Note that lookfor() and generate_dictionary() are synonyms of look_for() and works exactly in the same way.

If there is not enough space to print full labels in the console, they will be truncated (truncation is indicated by a ~).

Searching variables by key

When a data frame has dozens or even hundreds of variables, it could become difficult to find a specific variable. In such case, you can provide an optional list of keywords, which can be simple character strings or regular expression, to search for specific variables.

# Look for a single keyword.
look_for(iris, "petal")
##  pos variable     label           col_type missing values
##  3   Petal.Length Length of petal dbl      0             
##  4   Petal.Width  Width of Petal  dbl      0
look_for(iris, "s")
##  pos variable     label col_type missing values    
##  1   Sepal.Length —     dbl      0                 
##  2   Sepal.Width  —     dbl      0                 
##  5   Species      —     fct      0       setosa    
##                                          versicolor
##                                          virginica
# Look for with a regular expression
look_for(iris, "petal|species")
##  pos variable     label           col_type missing values    
##  3   Petal.Length Length of petal dbl      0                 
##  4   Petal.Width  Width of Petal  dbl      0                 
##  5   Species      —               fct      0       setosa    
##                                                    versicolor
##                                                    virginica
look_for(iris, "s$")
##  pos variable label col_type missing values    
##  5   Species  —     fct      0       setosa    
##                                      versicolor
##                                      virginica
# Look for with several keywords
look_for(iris, "pet", "sp")
##  pos variable     label           col_type missing values    
##  3   Petal.Length Length of petal dbl      0                 
##  4   Petal.Width  Width of Petal  dbl      0                 
##  5   Species      —               fct      0       setosa    
##                                                    versicolor
##                                                    virginica
# Look_for will take variable labels into account
look_for(women, "read", "level")
##  pos variable    label                col_type missing values       
##  9   instruction Level of instruction dbl+lbl  0       [0] none     
##                                                        [1] primary  
##                                                        [2] secondary
##                                                        [3] higher   
##  13  newspaper   Read newspaper?      dbl+lbl  0       [0] no       
##                                                        [1] yes

By default, look_for() will look through both variable names and variables labels. Use labels = FALSE to look only through variable names.

look_for(women, "read")
##  pos variable  label           col_type missing values 
##  13  newspaper Read newspaper? dbl+lbl  0       [0] no 
##                                                 [1] yes
look_for(women, "read", labels = FALSE)
## ! Nothing found. Sorry.

Similarly, the search is by default case insensitive. To make the search case sensitive, use ignore.case = FALSE.

look_for(iris, "sepal")
##  pos variable     label col_type missing values
##  1   Sepal.Length —     dbl      0             
##  2   Sepal.Width  —     dbl      0
look_for(iris, "sepal", ignore.case = FALSE)
## ! Nothing found. Sorry.

Level of details

If you just want to use the search feature of look_for() without computing the details of each variable, simply indicate details = "none" or details = FALSE.

look_for(women, "id", details = "none")
##  pos variable          label                   
##   1  id_woman          Woman Id                
##   2  id_household      Household Id            
##   7  residency         Urban / rural residency 
##  16  ideal_nb_children Ideal number of children

If you want more details (but can be time consuming for big data frames), indicate details = "full" or details = TRUE.

look_for(women, details = "full")
##  pos variable          label              col_type missing unique_values
##  1   id_woman          Woman Id           dbl      0       2000         
##  2   id_household      Household Id       dbl      0       1814         
##  3   weight            Sample weight      dbl      0       351          
##  4   interview_date    Interview date     date     0       165          
##  5   date_of_birth     Date of birth      date     0       1740         
##  6   age               Age at last anniv~ dbl      0       36           
##  7   residency         Urban / rural res~ dbl+lbl  0       2            
##                                                                         
##  8   region            Region             dbl+lbl  0       4            
##                                                                         
##                                                                         
##                                                                         
##  9   instruction       Level of instruct~ dbl+lbl  0       4            
##                                                                         
##                                                                         
##                                                                         
##  10  employed          Employed?          dbl+lbl  7       3            
##                                                                         
##                                                                         
##  11  matri             Matrimonial status dbl+lbl  0       6            
##                                                                         
##                                                                         
##                                                                         
##                                                                         
##                                                                         
##  12  religion          Religion           dbl+lbl  4       6            
##                                                                         
##                                                                         
##                                                                         
##                                                                         
##  13  newspaper         Read newspaper?    dbl+lbl  0       2            
##                                                                         
##  14  radio             Listen to radio?   dbl+lbl  0       2            
##                                                                         
##  15  tv                Watch TV?          dbl+lbl  0       2            
##                                                                         
##  16  ideal_nb_children Ideal number of c~ dbl+lbl  0       18           
##                                                                         
##  17  test              Ever tested for H~ dbl+lbl  29      3            
##                                                                         
##                                                                         
##  values             na_values na_range
##  range: 1 - 2000                      
##  range: 1 - 1814                      
##  range: 0.044629 -~                   
##  range: 2011-12-01~                   
##  range: 1962-02-07~                   
##  range: 14 - 49                       
##  [1] urban                            
##  [2] rural                            
##  [1] North                            
##  [2] East                             
##  [3] South                            
##  [4] West                             
##  [0] none                             
##  [1] primary                          
##  [2] secondary                        
##  [3] higher                           
##  [0] no             9                 
##  [1] yes                              
##  [9] missing                          
##  [0] single                           
##  [1] married                          
##  [2] living togeth~                   
##  [3] windowed                         
##  [4] divorced                         
##  [5] separated                        
##  [1] Muslim                           
##  [2] Christian                        
##  [3] Protestant                       
##  [4] no religion                      
##  [5] other                            
##  [0] no                               
##  [1] yes                              
##  [0] no                               
##  [1] yes                              
##  [0] no                               
##  [1] yes                              
##  [96] don't know                      
##  [99] missing                         
##  [0] no             9                 
##  [1] yes                              
##  [9] missing
look_for(women, details = "full") %>%
  dplyr::glimpse()
## Rows: 17
## Columns: 14
## $ pos           <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
## $ variable      <chr> "id_woman", "id_household", "weight", "interview_date", …
## $ label         <chr> "Woman Id", "Household Id", "Sample weight", "Interview …
## $ col_type      <chr> "dbl", "dbl", "dbl", "date", "date", "dbl", "dbl+lbl", "…
## $ missing       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 4, 0, 0, 0, 0, 29
## $ levels        <named list> <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, …
## $ value_labels  <named list> <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <1, 2>, …
## $ class         <named list> "numeric", "numeric", "numeric", "Date", "Date", …
## $ type          <chr> "double", "double", "double", "double", "double",…
## $ na_values     <named list> <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <…
## $ na_range      <named list> <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, …
## $ n_na          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 4, 0, 0, 0, 0, 29
## $ unique_values <int> 2000, 1814, 351, 165, 1740, 36, 2, 4, 4, 3, 6, 6,…
## $ range         <named list> <1, 2000>, <1, 1814>, <0.044629, 4.396831>, <2011…

Advanced usages of look_for()

look_for() returns a detailed tibble which is summarized before printing. To deactivate default printing and see full results, simply use dplyr::as_tibble(), dplyr::glimpse() or even utils::View().

look_for(women) %>% View()
look_for(women) %>% as_tibble()
## # A tibble: 17 × 7
##      pos variable          label            col_type missing levels value_labels
##    <int> <chr>             <chr>            <chr>      <int> <name> <named list>
##  1     1 id_woman          Woman Id         dbl            0 <NULL> <NULL>      
##  2     2 id_household      Household Id     dbl            0 <NULL> <NULL>      
##  3     3 weight            Sample weight    dbl            0 <NULL> <NULL>      
##  4     4 interview_date    Interview date   date           0 <NULL> <NULL>      
##  5     5 date_of_birth     Date of birth    date           0 <NULL> <NULL>      
##  6     6 age               Age at last ann… dbl            0 <NULL> <NULL>      
##  7     7 residency         Urban / rural r… dbl+lbl        0 <NULL> <dbl [2]>   
##  8     8 region            Region           dbl+lbl        0 <NULL> <dbl [4]>   
##  9     9 instruction       Level of instru… dbl+lbl        0 <NULL> <dbl [4]>   
## 10    10 employed          Employed?        dbl+lbl        7 <NULL> <dbl [3]>   
## 11    11 matri             Matrimonial sta… dbl+lbl        0 <NULL> <dbl [6]>   
## 12    12 religion          Religion         dbl+lbl        4 <NULL> <dbl [5]>   
## 13    13 newspaper         Read newspaper?  dbl+lbl        0 <NULL> <dbl [2]>   
## 14    14 radio             Listen to radio? dbl+lbl        0 <NULL> <dbl [2]>   
## 15    15 tv                Watch TV?        dbl+lbl        0 <NULL> <dbl [2]>   
## 16    16 ideal_nb_children Ideal number of… dbl+lbl        0 <NULL> <dbl [2]>   
## 17    17 test              Ever tested for… dbl+lbl       29 <NULL> <dbl [3]>
glimpse(look_for(women))
## Rows: 17
## Columns: 7
## $ pos          <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
## $ variable     <chr> "id_woman", "id_household", "weight", "interview_date", "…
## $ label        <chr> "Woman Id", "Household Id", "Sample weight", "Interview d…
## $ col_type     <chr> "dbl", "dbl", "dbl", "date", "date", "dbl", "dbl+lbl", "d…
## $ missing      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 4, 0, 0, 0, 0, 29
## $ levels       <named list> <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <…
## $ value_labels <named list> <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <1, 2>, <…

The tibble returned by look_for() could be easily manipulated for advanced programming.

When a column has several values for one variable (e.g. levels or value_labels), results as stored with nested named list. You can convert named lists into simpler character vectors, you can use convert_list_columns_to_character().

look_for(women) %>% convert_list_columns_to_character()
## # A tibble: 17 × 7
##      pos variable          label            col_type missing levels value_labels
##    <int> <chr>             <chr>            <chr>      <int> <chr>  <chr>       
##  1     1 id_woman          Woman Id         dbl            0 ""     ""          
##  2     2 id_household      Household Id     dbl            0 ""     ""          
##  3     3 weight            Sample weight    dbl            0 ""     ""          
##  4     4 interview_date    Interview date   date           0 ""     ""          
##  5     5 date_of_birth     Date of birth    date           0 ""     ""          
##  6     6 age               Age at last ann… dbl            0 ""     ""          
##  7     7 residency         Urban / rural r… dbl+lbl        0 ""     "[1] urban;…
##  8     8 region            Region           dbl+lbl        0 ""     "[1] North;…
##  9     9 instruction       Level of instru… dbl+lbl        0 ""     "[0] none; …
## 10    10 employed          Employed?        dbl+lbl        7 ""     "[0] no; [1…
## 11    11 matri             Matrimonial sta… dbl+lbl        0 ""     "[0] single…
## 12    12 religion          Religion         dbl+lbl        4 ""     "[1] Muslim…
## 13    13 newspaper         Read newspaper?  dbl+lbl        0 ""     "[0] no; [1…
## 14    14 radio             Listen to radio? dbl+lbl        0 ""     "[0] no; [1…
## 15    15 tv                Watch TV?        dbl+lbl        0 ""     "[0] no; [1…
## 16    16 ideal_nb_children Ideal number of… dbl+lbl        0 ""     "[96] don't…
## 17    17 test              Ever tested for… dbl+lbl       29 ""     "[0] no; [1…

Alternatively, you can use lookfor_to_long_format() to transform results into a long format with one row per factor level and per value label.

look_for(women) %>% lookfor_to_long_format()
## # A tibble: 41 × 7
##      pos variable       label               col_type missing levels value_labels
##    <int> <chr>          <chr>               <chr>      <int> <chr>  <chr>       
##  1     1 id_woman       Woman Id            dbl            0 <NA>   <NA>        
##  2     2 id_household   Household Id        dbl            0 <NA>   <NA>        
##  3     3 weight         Sample weight       dbl            0 <NA>   <NA>        
##  4     4 interview_date Interview date      date           0 <NA>   <NA>        
##  5     5 date_of_birth  Date of birth       date           0 <NA>   <NA>        
##  6     6 age            Age at last annive… dbl            0 <NA>   <NA>        
##  7     7 residency      Urban / rural resi… dbl+lbl        0 <NA>   [1] urban   
##  8     7 residency      Urban / rural resi… dbl+lbl        0 <NA>   [2] rural   
##  9     8 region         Region              dbl+lbl        0 <NA>   [1] North   
## 10     8 region         Region              dbl+lbl        0 <NA>   [2] East    
## # ℹ 31 more rows

Both can be combined:

look_for(women) %>%
  lookfor_to_long_format() %>%
  convert_list_columns_to_character()
## # A tibble: 41 × 7
##      pos variable       label               col_type missing levels value_labels
##    <int> <chr>          <chr>               <chr>      <int> <chr>  <chr>       
##  1     1 id_woman       Woman Id            dbl            0 <NA>   <NA>        
##  2     2 id_household   Household Id        dbl            0 <NA>   <NA>        
##  3     3 weight         Sample weight       dbl            0 <NA>   <NA>        
##  4     4 interview_date Interview date      date           0 <NA>   <NA>        
##  5     5 date_of_birth  Date of birth       date           0 <NA>   <NA>        
##  6     6 age            Age at last annive… dbl            0 <NA>   <NA>        
##  7     7 residency      Urban / rural resi… dbl+lbl        0 <NA>   [1] urban   
##  8     7 residency      Urban / rural resi… dbl+lbl        0 <NA>   [2] rural   
##  9     8 region         Region              dbl+lbl        0 <NA>   [1] North   
## 10     8 region         Region              dbl+lbl        0 <NA>   [2] East    
## # ℹ 31 more rows
labelled/inst/doc/missing_values.Rmd0000644000176200001440000001445514357761455017266 0ustar liggesusers--- author: "Joseph Larmarange" title: "About missing values: regular NAs, tagged NAs and user NAs" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{About missing values: regular NAs, tagged NAs and user NAs} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- In base **R**, missing values are indicated using the specific value `NA`. **Regular NAs** could be used with any type of vector (double, integer, character, factor, Date, etc.). Other statistical software have implemented ways to differentiate several types of missing values. **Stata** and **SAS** have a system of **tagged NAs**, where NA values are tagged with a letter (from a to z). **SPSS** allows users to indicate that certain non-missing values should be treated in some analysis as missing (**user NAs**). The `haven` package implements **tagged NAs** and **user NAs** in order to keep this information when importing files from **Stata**, **SAS** or **SPSS**. ```{r} library(labelled) ``` ## Tagged NAs ### Creation and tests **Tagged NAs** are proper `NA` values with a tag attached to them. They can be created with `tagged_na()`. The attached tag should be a single letter, lowercase (a-z) or uppercase (A-Z). ```{r} x <- c(1:5, tagged_na("a"), tagged_na("z"), NA) ``` For most **R** functions, tagged NAs are just considered as regular NAs. By default, they are just printed as any other regular NA. ```{r} x is.na(x) ``` To show/print their tags, you need to use `na_tag()`, `print_tagged_na()` or `format_tagged_na()`. ```{r} na_tag(x) print_tagged_na(x) format_tagged_na(x) ``` To test if a certain NA is a regular NA or a tagged NA, you should use `is_regular_na()` or `is_tagged_na()`. ```{r} is.na(x) is_tagged_na(x) # You can test for specific tagged NAs with the second argument is_tagged_na(x, "a") is_regular_na(x) ``` Tagged NAs could be defined **only** for double vectors. If you add a tagged NA to a character vector, it will be converted into a regular NA. If you add a tagged NA to an integer vector, the vector will be converted into a double vector. ```{r, error=TRUE} y <- c("a", "b", tagged_na("z")) y is_tagged_na(y) format_tagged_na(y) z <- c(1L, 2L, tagged_na("a")) typeof(z) format_tagged_na(z) ``` ### Unique values, duplicates and sorting with tagged NAs By default, functions such as `base::unique()`, `base::duplicated()`, `base::order()` or `base::sort()` will treat tagged NAs as the same thing as a regular NA. You can use `unique_tagged_na()`, `duplicated_tagged_na()`, `order_tagged_na()` and `sort_tagged_na()` as alternatives that will treat two tagged NAs with different tags as separate values. ```{r} x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA) x %>% print_tagged_na() unique(x) %>% print_tagged_na() unique_tagged_na(x) %>% print_tagged_na() duplicated(x) duplicated_tagged_na(x) sort(x, na.last = TRUE) %>% print_tagged_na() sort_tagged_na(x) %>% print_tagged_na() ``` ### Tagged NAs and value labels It is possible to define value labels for tagged NAs. ```{r} x <- c(1, 0, 1, tagged_na("r"), 0, tagged_na("d"), tagged_na("z"), NA) val_labels(x) <- c( no = 0, yes = 1, "don't know" = tagged_na("d"), refusal = tagged_na("r") ) x ``` When converting such labelled vector into factor, tagged NAs are, by default, converted into regular NAs (it is not possible to define tagged NAs with factors). ```{r} to_factor(x) ``` However, the option `explicit_tagged_na` of `to_factor()` allows to transform tagged NAs into explicit factor levels. ```{r} to_factor(x, explicit_tagged_na = TRUE) to_factor(x, levels = "prefixed", explicit_tagged_na = TRUE) ``` ### Conversion into user NAs Tagged NAs can be converted into user NAs with `tagged_na_to_user_na()`. ```{r} tagged_na_to_user_na(x) tagged_na_to_user_na(x, user_na_start = 10) ``` Use `tagged_na_to_regular_na()` to convert tagged NAs into regular NAs. ```{r} tagged_na_to_regular_na(x) tagged_na_to_regular_na(x) %>% is_tagged_na() ``` ## User NAs `haven` introduced an `haven_labelled_spss` class to deal with user defined missing values in a similar way as **SPSS**. In such case, additional attributes will be used to indicate with values should be considered as missing, but such values will not be stored as internal `NA` values. You should note that most R function will not take this information into account. Therefore, you will have to convert missing values into `NA` if required before analysis. These defined missing values could co-exist with internal `NA` values. ### Creation User NAs could be created directly with `labelled_spss()`. You can also manipulate them with `na_values()` and `na_range()`. ```{r} v <- labelled(c(1, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9)) v na_values(v) <- 9 v na_values(v) <- NULL v na_range(v) <- c(5, Inf) na_range(v) v ``` NB: you cant also use `set_na_range()` and `set_na_values()` for a `dplyr`-like syntax. ```{r} library(dplyr) # setting value labels and user NAs df <- tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) %>% set_value_labels(s2 = c(yes = 1, no = 2)) %>% set_na_values(s2 = 9) df$s2 # removing user NAs df <- df %>% set_na_values(s2 = NULL) df$s2 ``` ### Tests Note that `is.na()` will return `TRUE` for user NAs. Use `is_user_na()` to test if a specific value is a user NA and `is_regular_na()` to test if it is a regular NA. ```{r} v is.na(v) is_user_na(v) is_regular_na(v) ``` ### Conversion For most **R** functions, user NAs values are **still** regular values. ```{r} x <- c(1:5, 11:15) na_range(x) <- c(10, Inf) val_labels(x) <- c("dk" = 11, "refused" = 15) x mean(x) ``` You can convert user NAs into regular NAs with `user_na_to_na()` or `user_na_to_regular_na()` (both functions are identical). ```{r} user_na_to_na(x) mean(user_na_to_na(x), na.rm = TRUE) ``` Alternatively, if the vector is numeric, you can convert user NAs into tagged NAs with `user_na_to_tagged_na()`. ```{r} user_na_to_tagged_na(x) mean(user_na_to_tagged_na(x), na.rm = TRUE) ``` Finally, you can also remove user NAs definition without converting these values to `NA`, using `remove_user_na()`. ```{r} remove_user_na(x) mean(remove_user_na(x)) ``` labelled/inst/doc/labelled.Rmd0000644000176200001440000003660114737244525015773 0ustar liggesusers--- author: "Joseph Larmarange" title: "Introduction to labelled" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Introduction to labelled} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- The purpose of the **labelled** package is to provide functions to manipulate metadata as variable labels, value labels and defined missing values using the `haven_labelled` and `haven_labelled_spss` classes introduced in `haven` package. These classes allow to add metadata (variable, value labels and SPSS-style missing values) to vectors. It should be noted that **value labels** doesn't imply that your vectors should be considered as categorical or continuous. Therefore, value labels are not intended to be use for data analysis. For example, before performing modeling, you should convert vectors with value labels into factors or into classic numeric/character vectors. Therefore, two main approaches could be considered. ![Two main approaches](approaches.png){width=100%} In **approach A**, `haven_labelled` vectors are converted into factors or into numeric/character vectors just after data import, using `unlabelled()`, `to_factor()` or `unclass()`. Then, data cleaning, recoding and analysis are performed using classic **R** vector types. In **approach B**, `haven_labelled` vectors are kept for data cleaning and coding, allowing to preserved original recoding, in particular if data should be reexported after that step. Functions provided by `labelled` will be useful for managing value labels. However, as in approach A, `haven_labelled` vectors will have to be converted into classic factors or numeric vectors before data analysis (in particular modeling) as this is the way categorical and continuous variables should be coded for analysis functions. ## Variable labels A variable label could be specified for any vector using `var_label()`. ```{r} library(labelled) var_label(iris$Sepal.Length) <- "Length of sepal" ``` It's possible to add a variable label to several columns of a data frame using a named list. ```{r} var_label(iris) <- list( Petal.Length = "Length of petal", Petal.Width = "Width of Petal" ) ``` To get the variable label, simply call `var_label()`. ```{r} var_label(iris$Petal.Width) var_label(iris) ``` To remove a variable label, use `NULL`. ```{r} var_label(iris$Sepal.Length) <- NULL ``` In **RStudio**, variable labels will be displayed in data viewer. ```{r, eval=FALSE} View(iris) ``` You can display and search through variable names and labels with `look_for()`: ```{r} look_for(iris) look_for(iris, "pet") look_for(iris, details = FALSE) ``` ## Value labels The first way to create a labelled vector is to use the `labelled()` function. It's not mandatory to provide a label for each value observed in your vector. You can also provide a label for values not observed. ```{r} v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 8, refused = 9) ) v ``` Use `val_labels()` to get all value labels and `val_label()` to get the value label associated with a specific value. ```{r} val_labels(v) val_label(v, 8) ``` `val_labels()` could also be used to modify all the value labels attached to a vector, while `val_label()` will update only one specific value label. ```{r} val_labels(v) <- c(yes = 1, nno = 3, bug = 5) v val_label(v, 3) <- "no" v ``` With `val_label()`, you can also add or remove specific value labels. ```{r} val_label(v, 2) <- "maybe" val_label(v, 5) <- NULL v ``` To remove all value labels, use `val_labels()` and `NULL`. The `haven_labelled` class will also be removed. ```{r} val_labels(v) <- NULL v ``` Adding a value label to a non labelled vector will apply `haven_labelled` class to it. ```{r} val_label(v, 1) <- "yes" v ``` Note that applying `val_labels()` to a factor will generate an error! ```{r, error = TRUE} f <- factor(1:3) f val_labels(f) <- c(yes = 1, no = 3) ``` You could also apply `val_labels()` to several columns of a data frame. ```{r} df <- data.frame(v1 = 1:3, v2 = c(2, 3, 1), v3 = 3:1) val_label(df, 1) <- "yes" val_label(df[, c("v1", "v3")], 2) <- "maybe" val_label(df[, c("v2", "v3")], 3) <- "no" val_labels(df) val_labels(df[, c("v1", "v3")]) <- c(YES = 1, MAYBE = 2, NO = 3) val_labels(df) val_labels(df) <- NULL val_labels(df) val_labels(df) <- list(v1 = c(yes = 1, no = 3), v2 = c(a = 1, b = 2, c = 3)) val_labels(df) ``` ## Sorting value labels Value labels are sorted by default in the order they have been created. ```{r} v <- c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA) val_label(v, 1) <- "yes" val_label(v, 3) <- "no" val_label(v, 9) <- "refused" val_label(v, 2) <- "maybe" val_label(v, 8) <- "don't know" v ``` It could be useful to reorder the value labels according to their attached values, with `sort_val_labels()`. ```{r} sort_val_labels(v) sort_val_labels(v, decreasing = TRUE) ``` If you prefer, you can also sort them according to the labels. ```{r} sort_val_labels(v, according_to = "l") ``` ## User defined missing values (SPSS's style) `haven` (>= 2.0.0) introduced an additional `haven_labelled_spss` class to deal with user defined missing values. In such case, additional attributes will be used to indicate with values should be considered as missing, but such values will not be stored as internal `NA` values. You should note that most R function will not take this information into account. Therefore, you will have to convert missing values into `NA` if required before analysis. These defined missing values could co-exist with internal `NA` values. It is possible to manipulate this missing values with `na_values()` and `na_range()`. Note that `is.na()` will return `TRUE` as well for user-defined missing values. ```{r} v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) v na_values(v) <- 9 na_values(v) v is.na(v) na_values(v) <- NULL v na_range(v) <- c(5, Inf) na_range(v) v ``` Since version 2.1.0, it is not mandatory to define at least one value label before defining missing values. ```{r} x <- c(1, 2, 2, 9) na_values(x) <- 9 x ``` To convert user defined missing values into `NA`, simply use `user_na_to_na()`. ```{r} v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v v2 <- user_na_to_na(v) v2 ``` You can also remove user missing values definition without converting these values to `NA`. ```{r} v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v v2 <- remove_user_na(v) v2 ``` or ```{r} v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v na_values(v) <- NULL v ``` ## Other conversion to NA In some cases, values who don't have an attached value label could be considered as missing. `nolabel_to_na()` will convert them to `NA`. ```{r} v <- labelled(c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, maybe = 2, no = 3)) v nolabel_to_na(v) ``` In other cases, a value label is attached only to specific values that corresponds to a missing value. For example: ```{r} size <- labelled(c(1.88, 1.62, 1.78, 99, 1.91), c("not measured" = 99)) size ``` In such cases, `val_labels_to_na()` could be appropriate. ```{r} val_labels_to_na(size) ``` These two functions could also be applied to an overall data frame. Only labelled vectors will be impacted. ## Converting to factor A labelled vector could easily be converted to a factor with `to_factor()`. ```{r} v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 8, refused = 9) ) v to_factor(v) ``` The `levels` argument allows to specify what should be used as the factor levels, i.e. the labels (default), the values or the labels prefixed with values. ```{r} to_factor(v, levels = "v") to_factor(v, levels = "p") ``` The `ordered` argument will create an ordinal factor. ```{r} to_factor(v, ordered = TRUE) ``` The argument `nolabel_to_na` specify if the corresponding function should be applied before converting to a factor. Therefore, the two following commands are equivalent. ```{r} to_factor(v, nolabel_to_na = TRUE) to_factor(nolabel_to_na(v)) ``` `sort_levels` specifies how the levels should be sorted: `"none"` to keep the order in which value labels have been defined, `"values"` to order the levels according to the values and `"labels"` according to the labels. `"auto"` (default) will be equivalent to `"none"` except if some values with no attached labels are found and are not dropped. In that case, `"values"` will be used. ```{r} to_factor(v, sort_levels = "n") to_factor(v, sort_levels = "v") to_factor(v, sort_levels = "l") ``` The function `to_labelled()` could be used to turn a factor into a labelled numeric vector. ```{r} f <- factor(1:3, labels = c("a", "b", "c")) to_labelled(f) ``` Note that `to_labelled(to_factor(v))` will not be equal to `v` due to the way factors are stored internally by **R**. ```{r} v to_labelled(to_factor(v)) ``` ## Other type of conversions You can use `to_character()` for converting into a character vector instead of a factor. ```{r} v to_character(v) ``` To remove the `haven_class`, you can simply use `unclass()`. ```{r} unclass(v) ``` Note that value labels will be preserved as an attribute to the vector. ```{r} remove_val_labels(v) ``` To remove value labels, use `remove_val_labels()`. ```{r} remove_val_labels(v) ``` Note that if your vector does have user-defined missing values, you may also want to use `remove_user_na()`. ```{r} x <- c(1, 2, 2, 9) na_values(x) <- 9 val_labels(x) <- c(yes = 1, no = 2) var_label(x) <- "A test variable" x remove_val_labels(x) remove_user_na(x) remove_user_na(x, user_na_to_na = TRUE) remove_val_labels(remove_user_na(x)) unclass(x) ``` You can remove all labels and user-defined missing values with `remove_labels()`. Use `keep_var_label = TRUE` to preserve only variable label. ```{r} remove_labels(x, user_na_to_na = TRUE) remove_labels(x, user_na_to_na = TRUE, keep_var_label = TRUE) ``` ## Conditional conversion to factors{#unlabelled} For any analysis, it is the responsibility of user to identify which labelled numeric vectors should be considered as **categorical** (and therefore converted into factors using `to_factor()`) and which variables should be treated as **continuous** (and therefore unclassed into numeric using `base::unclass()`). It should be noted that most functions expect categorical variables to be coded as factors. It includes most modeling functions (such as `stats::lm()` or `stats::glm()`) or plotting functions from `ggplot2`. In most of cases, if data documentation was properly done, categorical variables corresponds to vectors where all observed values have a value label while vectors where only few values have a value label should be considered as continuous. In that situation, you could apply the `unlabelled()` method to an overall data frame. By default, `unlabelled()` works as follow: - if a column doesn't inherit the `haven_labelled` class, it will be not affected; - if all observed values have a corresponding value label, the column will be converted into a factor (using `to_factor()`); - otherwise, the column will be unclassed (and converted back to a numeric or character vector by applying `base::unclass()`). ```{r} df <- data.frame( a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), c = labelled(c(1, 1, 2, 2), labels = c(No = 1, Yes = 2, DK = 3)), d = labelled(c("a", "a", "b", "c"), labels = c(No = "a", Yes = "b")), e = labelled_spss( c(1, 9, 1, 2), labels = c(No = 1, Yes = 2), na_values = 9 ) ) df %>% look_for() unlabelled(df) %>% look_for() unlabelled(df, user_na_to_na = TRUE) %>% look_for() unlabelled(df, drop_unused_labels = TRUE) %>% look_for() ``` ## Importing labelled data In **haven** package, `read_spss`, `read_stata` and `read_sas` are natively importing data using the `labelled` class and the `label` attribute for variable labels. Functions from **foreign** package could also import some metadata from **SPSS** and **Stata** files. `to_labelled` can convert data imported with **foreign** into a labelled data frame. However, there are some limitations compared to using **haven**: - For **SPSS** files, it will be better to set `use.value.labels = FALSE`, `to.data.frame = FALSE` and `use.missings = FALSE` when calling `read.spss`. If `use.value.labels = TRUE`, variable with value labels will be converted into factors by `read.spss` (and kept as factors by `foreign_to_label`). If `to.data.frame = TRUE`, meta data describing the missing values will not be imported. If `use.missings = TRUE`, missing values would have been converted to `NA` by `read.spss`. - For **Stata** files, set `convert.factors = FALSE` when calling `read.dta` to avoid conversion of variables with value labels into factors. So far, missing values defined in Stata are always imported as `NA` by `read.dta` and could not be retrieved by `foreign_to_labelled`. The **memisc** package provide functions to import variable metadata and store them in specific object of class `data.set`. The `to_labelled` method can convert a data.set into a labelled data frame. ```{r, eval=FALSE} # from foreign library(foreign) df <- to_labelled(read.spss( "file.sav", to.data.frame = FALSE, use.value.labels = FALSE, use.missings = FALSE )) df <- to_labelled(read.dta( "file.dta", convert.factors = FALSE )) # from memisc library(memisc) nes1948.por <- UnZip("anes/NES1948.ZIP", "NES1948.POR", package = "memisc") nes1948 <- spss.portable.file(nes1948.por) df <- to_labelled(nes1948) ds <- as.data.set(nes19480) df <- to_labelled(ds) ``` ## Using labelled with dplyr/magrittr If you are using the `%>%` operator, you can use the functions `set_variable_labels()`, `set_value_labels()`, `add_value_labels()` and `remove_value_labels()`. ```{r} library(dplyr) df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) %>% set_variable_labels(s1 = "Sex", s2 = "Question") %>% set_value_labels(s1 = c(Male = "M", Female = "F"), s2 = c(Yes = 1, No = 2)) df$s2 ``` `set_value_labels()` will replace the list of value labels while `add_value_labels()` will update it. ```{r} df <- df %>% set_value_labels(s2 = c(Yes = 1, "Don't know" = 8, Unknown = 9)) df$s2 df <- df %>% add_value_labels(s2 = c(No = 2)) df$s2 ``` You can also remove some variable and/or value labels. ```{r} df <- df %>% set_variable_labels(s1 = NULL) # removing one value label df <- df %>% remove_value_labels(s2 = 2) df$s2 # removing several value labels df <- df %>% remove_value_labels(s2 = 8:9) df$s2 # removing all value labels df <- df %>% set_value_labels(s2 = NULL) df$s2 ``` To convert variables, the easiest is to use `unlabelled()`. ```{r} library(questionr) data(fertility) glimpse(women) glimpse(women %>% unlabelled()) ``` Alternatively, you can use functions as `dplyr::mutate()` + `dplyr::across()`. See the example below. ```{r} glimpse(to_factor(women)) glimpse(women %>% mutate(across(where(is.labelled), to_factor))) glimpse(women %>% mutate(across(employed:religion, to_factor))) ``` labelled/inst/doc/missing_values.R0000644000176200001440000000670014737431513016726 0ustar liggesusers## ----------------------------------------------------------------------------- library(labelled) ## ----------------------------------------------------------------------------- x <- c(1:5, tagged_na("a"), tagged_na("z"), NA) ## ----------------------------------------------------------------------------- x is.na(x) ## ----------------------------------------------------------------------------- na_tag(x) print_tagged_na(x) format_tagged_na(x) ## ----------------------------------------------------------------------------- is.na(x) is_tagged_na(x) # You can test for specific tagged NAs with the second argument is_tagged_na(x, "a") is_regular_na(x) ## ----error=TRUE--------------------------------------------------------------- y <- c("a", "b", tagged_na("z")) y is_tagged_na(y) format_tagged_na(y) z <- c(1L, 2L, tagged_na("a")) typeof(z) format_tagged_na(z) ## ----------------------------------------------------------------------------- x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA) x %>% print_tagged_na() unique(x) %>% print_tagged_na() unique_tagged_na(x) %>% print_tagged_na() duplicated(x) duplicated_tagged_na(x) sort(x, na.last = TRUE) %>% print_tagged_na() sort_tagged_na(x) %>% print_tagged_na() ## ----------------------------------------------------------------------------- x <- c(1, 0, 1, tagged_na("r"), 0, tagged_na("d"), tagged_na("z"), NA) val_labels(x) <- c( no = 0, yes = 1, "don't know" = tagged_na("d"), refusal = tagged_na("r") ) x ## ----------------------------------------------------------------------------- to_factor(x) ## ----------------------------------------------------------------------------- to_factor(x, explicit_tagged_na = TRUE) to_factor(x, levels = "prefixed", explicit_tagged_na = TRUE) ## ----------------------------------------------------------------------------- tagged_na_to_user_na(x) tagged_na_to_user_na(x, user_na_start = 10) ## ----------------------------------------------------------------------------- tagged_na_to_regular_na(x) tagged_na_to_regular_na(x) %>% is_tagged_na() ## ----------------------------------------------------------------------------- v <- labelled(c(1, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9)) v na_values(v) <- 9 v na_values(v) <- NULL v na_range(v) <- c(5, Inf) na_range(v) v ## ----------------------------------------------------------------------------- library(dplyr) # setting value labels and user NAs df <- tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) %>% set_value_labels(s2 = c(yes = 1, no = 2)) %>% set_na_values(s2 = 9) df$s2 # removing user NAs df <- df %>% set_na_values(s2 = NULL) df$s2 ## ----------------------------------------------------------------------------- v is.na(v) is_user_na(v) is_regular_na(v) ## ----------------------------------------------------------------------------- x <- c(1:5, 11:15) na_range(x) <- c(10, Inf) val_labels(x) <- c("dk" = 11, "refused" = 15) x mean(x) ## ----------------------------------------------------------------------------- user_na_to_na(x) mean(user_na_to_na(x), na.rm = TRUE) ## ----------------------------------------------------------------------------- user_na_to_tagged_na(x) mean(user_na_to_tagged_na(x), na.rm = TRUE) ## ----------------------------------------------------------------------------- remove_user_na(x) mean(remove_user_na(x)) labelled/inst/doc/look_for.R0000644000176200001440000000440414737431510015504 0ustar liggesusers## ----message=FALSE------------------------------------------------------------ library(dplyr) ## ----------------------------------------------------------------------------- iris %>% as_tibble() ## ----------------------------------------------------------------------------- data(fertility, package = "questionr") women ## ----------------------------------------------------------------------------- glimpse(iris) glimpse(women) ## ----------------------------------------------------------------------------- library(labelled) look_for(iris) look_for(women) ## ----------------------------------------------------------------------------- # Look for a single keyword. look_for(iris, "petal") look_for(iris, "s") # Look for with a regular expression look_for(iris, "petal|species") look_for(iris, "s$") # Look for with several keywords look_for(iris, "pet", "sp") # Look_for will take variable labels into account look_for(women, "read", "level") ## ----------------------------------------------------------------------------- look_for(women, "read") look_for(women, "read", labels = FALSE) ## ----------------------------------------------------------------------------- look_for(iris, "sepal") look_for(iris, "sepal", ignore.case = FALSE) ## ----------------------------------------------------------------------------- look_for(women, "id", details = "none") ## ----------------------------------------------------------------------------- look_for(women, details = "full") look_for(women, details = "full") %>% dplyr::glimpse() ## ----eval=FALSE--------------------------------------------------------------- # look_for(women) %>% View() ## ----------------------------------------------------------------------------- look_for(women) %>% as_tibble() glimpse(look_for(women)) ## ----------------------------------------------------------------------------- look_for(women) %>% convert_list_columns_to_character() ## ----------------------------------------------------------------------------- look_for(women) %>% lookfor_to_long_format() ## ----------------------------------------------------------------------------- look_for(women) %>% lookfor_to_long_format() %>% convert_list_columns_to_character() labelled/inst/doc/labelled.html0000644000176200001440000050505014737431505016210 0ustar liggesusers Introduction to labelled

Introduction to labelled

Joseph Larmarange

The purpose of the labelled package is to provide functions to manipulate metadata as variable labels, value labels and defined missing values using the haven_labelled and haven_labelled_spss classes introduced in haven package.

These classes allow to add metadata (variable, value labels and SPSS-style missing values) to vectors.

It should be noted that value labels doesn’t imply that your vectors should be considered as categorical or continuous. Therefore, value labels are not intended to be use for data analysis. For example, before performing modeling, you should convert vectors with value labels into factors or into classic numeric/character vectors.

Therefore, two main approaches could be considered.

Two main approaches
Two main approaches

In approach A, haven_labelled vectors are converted into factors or into numeric/character vectors just after data import, using unlabelled(), to_factor() or unclass(). Then, data cleaning, recoding and analysis are performed using classic R vector types.

In approach B, haven_labelled vectors are kept for data cleaning and coding, allowing to preserved original recoding, in particular if data should be reexported after that step. Functions provided by labelled will be useful for managing value labels. However, as in approach A, haven_labelled vectors will have to be converted into classic factors or numeric vectors before data analysis (in particular modeling) as this is the way categorical and continuous variables should be coded for analysis functions.

Variable labels

A variable label could be specified for any vector using var_label().

library(labelled)

var_label(iris$Sepal.Length) <- "Length of sepal"

It’s possible to add a variable label to several columns of a data frame using a named list.

var_label(iris) <- list(
  Petal.Length = "Length of petal",
  Petal.Width = "Width of Petal"
)

To get the variable label, simply call var_label().

var_label(iris$Petal.Width)
## [1] "Width of Petal"
var_label(iris)
## $Sepal.Length
## [1] "Length of sepal"
## 
## $Sepal.Width
## NULL
## 
## $Petal.Length
## [1] "Length of petal"
## 
## $Petal.Width
## [1] "Width of Petal"
## 
## $Species
## NULL

To remove a variable label, use NULL.

var_label(iris$Sepal.Length) <- NULL

In RStudio, variable labels will be displayed in data viewer.

View(iris)

You can display and search through variable names and labels with look_for():

look_for(iris)
##  pos variable     label           col_type missing values    
##  1   Sepal.Length —               dbl      0                 
##  2   Sepal.Width  —               dbl      0                 
##  3   Petal.Length Length of petal dbl      0                 
##  4   Petal.Width  Width of Petal  dbl      0                 
##  5   Species      —               fct      0       setosa    
##                                                    versicolor
##                                                    virginica
look_for(iris, "pet")
##  pos variable     label           col_type missing values
##  3   Petal.Length Length of petal dbl      0             
##  4   Petal.Width  Width of Petal  dbl      0
look_for(iris, details = FALSE)
##  pos variable     label          
##  1   Sepal.Length —              
##  2   Sepal.Width  —              
##  3   Petal.Length Length of petal
##  4   Petal.Width  Width of Petal 
##  5   Species      —

Value labels

The first way to create a labelled vector is to use the labelled() function. It’s not mandatory to provide a label for each value observed in your vector. You can also provide a label for values not observed.

v <- labelled(
  c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA),
  c(yes = 1, no = 3, "don't know" = 8, refused = 9)
)
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      8 don't know
##      9    refused

Use val_labels() to get all value labels and val_label() to get the value label associated with a specific value.

val_labels(v)
##        yes         no don't know    refused 
##          1          3          8          9
val_label(v, 8)
## [1] "don't know"

val_labels() could also be used to modify all the value labels attached to a vector, while val_label() will update only one specific value label.

val_labels(v) <- c(yes = 1, nno = 3, bug = 5)
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value label
##      1   yes
##      3   nno
##      5   bug
val_label(v, 3) <- "no"
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value label
##      1   yes
##      3    no
##      5   bug

With val_label(), you can also add or remove specific value labels.

val_label(v, 2) <- "maybe"
val_label(v, 5) <- NULL
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value label
##      1   yes
##      3    no
##      2 maybe

To remove all value labels, use val_labels() and NULL. The haven_labelled class will also be removed.

val_labels(v) <- NULL
v
##  [1]  1  2  2  2  3  9  1  3  2 NA

Adding a value label to a non labelled vector will apply haven_labelled class to it.

val_label(v, 1) <- "yes"
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value label
##      1   yes

Note that applying val_labels() to a factor will generate an error!

f <- factor(1:3)
f
## [1] 1 2 3
## Levels: 1 2 3
val_labels(f) <- c(yes = 1, no = 3)
## Error in `val_labels<-`:
## ! Value labels cannot be applied to factors.

You could also apply val_labels() to several columns of a data frame.

df <- data.frame(v1 = 1:3, v2 = c(2, 3, 1), v3 = 3:1)

val_label(df, 1) <- "yes"
val_label(df[, c("v1", "v3")], 2) <- "maybe"
val_label(df[, c("v2", "v3")], 3) <- "no"
val_labels(df)
## $v1
##   yes maybe 
##     1     2 
## 
## $v2
## yes  no 
##   1   3 
## 
## $v3
##   yes maybe    no 
##     1     2     3
val_labels(df[, c("v1", "v3")]) <- c(YES = 1, MAYBE = 2, NO = 3)
val_labels(df)
## $v1
##   YES MAYBE    NO 
##     1     2     3 
## 
## $v2
## yes  no 
##   1   3 
## 
## $v3
##   YES MAYBE    NO 
##     1     2     3
val_labels(df) <- NULL
val_labels(df)
## $v1
## NULL
## 
## $v2
## NULL
## 
## $v3
## NULL
val_labels(df) <- list(v1 = c(yes = 1, no = 3), v2 = c(a = 1, b = 2, c = 3))
val_labels(df)
## $v1
## yes  no 
##   1   3 
## 
## $v2
## a b c 
## 1 2 3 
## 
## $v3
## NULL

Sorting value labels

Value labels are sorted by default in the order they have been created.

v <- c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA)
val_label(v, 1) <- "yes"
val_label(v, 3) <- "no"
val_label(v, 9) <- "refused"
val_label(v, 2) <- "maybe"
val_label(v, 8) <- "don't know"
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9    refused
##      2      maybe
##      8 don't know

It could be useful to reorder the value labels according to their attached values, with sort_val_labels().

sort_val_labels(v)
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      2      maybe
##      3         no
##      8 don't know
##      9    refused
sort_val_labels(v, decreasing = TRUE)
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      9    refused
##      8 don't know
##      3         no
##      2      maybe
##      1        yes

If you prefer, you can also sort them according to the labels.

sort_val_labels(v, according_to = "l")
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      8 don't know
##      2      maybe
##      3         no
##      9    refused
##      1        yes

User defined missing values (SPSS’s style)

haven (>= 2.0.0) introduced an additional haven_labelled_spss class to deal with user defined missing values. In such case, additional attributes will be used to indicate with values should be considered as missing, but such values will not be stored as internal NA values. You should note that most R function will not take this information into account. Therefore, you will have to convert missing values into NA if required before analysis. These defined missing values could co-exist with internal NA values.

It is possible to manipulate this missing values with na_values() and na_range(). Note that is.na() will return TRUE as well for user-defined missing values.

v <- labelled(
  c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA),
  c(yes = 1, no = 3, "don't know" = 9)
)
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know
na_values(v) <- 9
na_values(v)
## [1] 9
v
## <labelled_spss<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## Missing values: 9
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know
is.na(v)
##  [1] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE
na_values(v) <- NULL
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know
na_range(v) <- c(5, Inf)
na_range(v)
## [1]   5 Inf
v
## <labelled_spss<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## Missing range:  [5, Inf]
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know

Since version 2.1.0, it is not mandatory to define at least one value label before defining missing values.

x <- c(1, 2, 2, 9)
na_values(x) <- 9
x
## <labelled_spss<double>[4]>
## [1] 1 2 2 9
## Missing values: 9

To convert user defined missing values into NA, simply use user_na_to_na().

v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10))
v
## <labelled_spss<integer>[10]>
##  [1]  1  2  3  4  5  6  7  8  9 10
## Missing values: 9, 10
## 
## Labels:
##  value label
##      1  Good
##      8   Bad
v2 <- user_na_to_na(v)
v2
## <labelled<integer>[10]>
##  [1]  1  2  3  4  5  6  7  8 NA NA
## 
## Labels:
##  value label
##      1  Good
##      8   Bad

You can also remove user missing values definition without converting these values to NA.

v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10))
v
## <labelled_spss<integer>[10]>
##  [1]  1  2  3  4  5  6  7  8  9 10
## Missing values: 9, 10
## 
## Labels:
##  value label
##      1  Good
##      8   Bad
v2 <- remove_user_na(v)
v2
## <labelled<integer>[10]>
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## Labels:
##  value label
##      1  Good
##      8   Bad

or

v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10))
v
## <labelled_spss<integer>[10]>
##  [1]  1  2  3  4  5  6  7  8  9 10
## Missing values: 9, 10
## 
## Labels:
##  value label
##      1  Good
##      8   Bad
na_values(v) <- NULL
v
## <labelled<integer>[10]>
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## Labels:
##  value label
##      1  Good
##      8   Bad

Other conversion to NA

In some cases, values who don’t have an attached value label could be considered as missing. nolabel_to_na() will convert them to NA.

v <- labelled(c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, maybe = 2, no = 3))
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value label
##      1   yes
##      2 maybe
##      3    no
nolabel_to_na(v)
## <labelled<double>[10]>
##  [1]  1  2  2  2  3 NA  1  3  2 NA
## 
## Labels:
##  value label
##      1   yes
##      2 maybe
##      3    no

In other cases, a value label is attached only to specific values that corresponds to a missing value. For example:

size <- labelled(c(1.88, 1.62, 1.78, 99, 1.91), c("not measured" = 99))
size
## <labelled<double>[5]>
## [1]  1.88  1.62  1.78 99.00  1.91
## 
## Labels:
##  value        label
##     99 not measured

In such cases, val_labels_to_na() could be appropriate.

val_labels_to_na(size)
## [1] 1.88 1.62 1.78   NA 1.91

These two functions could also be applied to an overall data frame. Only labelled vectors will be impacted.

Converting to factor

A labelled vector could easily be converted to a factor with to_factor().

v <- labelled(
  c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA),
  c(yes = 1, no = 3, "don't know" = 8, refused = 9)
)
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      8 don't know
##      9    refused
to_factor(v)
##  [1] yes     2       2       2       no      refused yes     no      2      
## [10] <NA>   
## Levels: yes 2 no don't know refused

The levels argument allows to specify what should be used as the factor levels, i.e. the labels (default), the values or the labels prefixed with values.

to_factor(v, levels = "v")
##  [1] 1    2    2    2    3    9    1    3    2    <NA>
## Levels: 1 2 3 8 9
to_factor(v, levels = "p")
##  [1] [1] yes     [2] 2       [2] 2       [2] 2       [3] no      [9] refused
##  [7] [1] yes     [3] no      [2] 2       <NA>       
## Levels: [1] yes [2] 2 [3] no [8] don't know [9] refused

The ordered argument will create an ordinal factor.

to_factor(v, ordered = TRUE)
##  [1] yes     2       2       2       no      refused yes     no      2      
## [10] <NA>   
## Levels: yes < 2 < no < don't know < refused

The argument nolabel_to_na specify if the corresponding function should be applied before converting to a factor. Therefore, the two following commands are equivalent.

to_factor(v, nolabel_to_na = TRUE)
##  [1] yes     <NA>    <NA>    <NA>    no      refused yes     no      <NA>   
## [10] <NA>   
## Levels: yes no don't know refused
to_factor(nolabel_to_na(v))
##  [1] yes     <NA>    <NA>    <NA>    no      refused yes     no      <NA>   
## [10] <NA>   
## Levels: yes no don't know refused

sort_levels specifies how the levels should be sorted: "none" to keep the order in which value labels have been defined, "values" to order the levels according to the values and "labels" according to the labels. "auto" (default) will be equivalent to "none" except if some values with no attached labels are found and are not dropped. In that case, "values" will be used.

to_factor(v, sort_levels = "n")
##  [1] yes     2       2       2       no      refused yes     no      2      
## [10] <NA>   
## Levels: yes no don't know refused 2
to_factor(v, sort_levels = "v")
##  [1] yes     2       2       2       no      refused yes     no      2      
## [10] <NA>   
## Levels: yes 2 no don't know refused
to_factor(v, sort_levels = "l")
##  [1] yes     2       2       2       no      refused yes     no      2      
## [10] <NA>   
## Levels: 2 don't know no refused yes

The function to_labelled() could be used to turn a factor into a labelled numeric vector.

f <- factor(1:3, labels = c("a", "b", "c"))
to_labelled(f)
## <labelled<double>[3]>
## [1] 1 2 3
## 
## Labels:
##  value label
##      1     a
##      2     b
##      3     c

Note that to_labelled(to_factor(v)) will not be equal to v due to the way factors are stored internally by R.

v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      8 don't know
##      9    refused
to_labelled(to_factor(v))
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  5  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      2          2
##      3         no
##      4 don't know
##      5    refused

Other type of conversions

You can use to_character() for converting into a character vector instead of a factor.

v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      8 don't know
##      9    refused
to_character(v)
##  [1] "yes"     "2"       "2"       "2"       "no"      "refused" "yes"    
##  [8] "no"      "2"       NA

To remove the haven_class, you can simply use unclass().

unclass(v)
##  [1]  1  2  2  2  3  9  1  3  2 NA
## attr(,"labels")
##        yes         no don't know    refused 
##          1          3          8          9

Note that value labels will be preserved as an attribute to the vector.

remove_val_labels(v)
##  [1]  1  2  2  2  3  9  1  3  2 NA

To remove value labels, use remove_val_labels().

remove_val_labels(v)
##  [1]  1  2  2  2  3  9  1  3  2 NA

Note that if your vector does have user-defined missing values, you may also want to use remove_user_na().

x <- c(1, 2, 2, 9)
na_values(x) <- 9
val_labels(x) <- c(yes = 1, no = 2)
var_label(x) <- "A test variable"
x
## <labelled_spss<double>[4]>: A test variable
## [1] 1 2 2 9
## Missing values: 9
## 
## Labels:
##  value label
##      1   yes
##      2    no
remove_val_labels(x)
## <labelled_spss<double>[4]>: A test variable
## [1] 1 2 2 9
## Missing values: 9
remove_user_na(x)
## <labelled<double>[4]>: A test variable
## [1] 1 2 2 9
## 
## Labels:
##  value label
##      1   yes
##      2    no
remove_user_na(x, user_na_to_na = TRUE)
## <labelled<double>[4]>: A test variable
## [1]  1  2  2 NA
## 
## Labels:
##  value label
##      1   yes
##      2    no
remove_val_labels(remove_user_na(x))
## [1] 1 2 2 9
## attr(,"label")
## [1] "A test variable"
unclass(x)
## [1] 1 2 2 9
## attr(,"labels")
## yes  no 
##   1   2 
## attr(,"na_values")
## [1] 9
## attr(,"label")
## [1] "A test variable"

You can remove all labels and user-defined missing values with remove_labels(). Use keep_var_label = TRUE to preserve only variable label.

remove_labels(x, user_na_to_na = TRUE)
## [1]  1  2  2 NA
remove_labels(x, user_na_to_na = TRUE, keep_var_label = TRUE)
## [1]  1  2  2 NA
## attr(,"label")
## [1] "A test variable"

Conditional conversion to factors

For any analysis, it is the responsibility of user to identify which labelled numeric vectors should be considered as categorical (and therefore converted into factors using to_factor()) and which variables should be treated as continuous (and therefore unclassed into numeric using base::unclass()).

It should be noted that most functions expect categorical variables to be coded as factors. It includes most modeling functions (such as stats::lm() or stats::glm()) or plotting functions from ggplot2.

In most of cases, if data documentation was properly done, categorical variables corresponds to vectors where all observed values have a value label while vectors where only few values have a value label should be considered as continuous.

In that situation, you could apply the unlabelled() method to an overall data frame. By default, unlabelled() works as follow:

  • if a column doesn’t inherit the haven_labelled class, it will be not affected;
  • if all observed values have a corresponding value label, the column will be converted into a factor (using to_factor());
  • otherwise, the column will be unclassed (and converted back to a numeric or character vector by applying base::unclass()).
df <- data.frame(
  a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)),
  b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)),
  c = labelled(c(1, 1, 2, 2), labels = c(No = 1, Yes = 2, DK = 3)),
  d = labelled(c("a", "a", "b", "c"), labels = c(No = "a", Yes = "b")),
  e = labelled_spss(
    c(1, 9, 1, 2),
    labels = c(No = 1, Yes = 2),
    na_values = 9
  )
)
df %>% look_for()
##  pos variable label col_type missing values 
##  1   a        —     dbl+lbl  0       [1] No 
##                                      [2] Yes
##  2   b        —     dbl+lbl  0       [1] No 
##                                      [2] Yes
##                                      [3] DK 
##  3   c        —     dbl+lbl  0       [1] No 
##                                      [2] Yes
##                                      [3] DK 
##  4   d        —     chr+lbl  0       [a] No 
##                                      [b] Yes
##  5   e        —     dbl+lbl  1       [1] No 
##                                      [2] Yes
unlabelled(df) %>% look_for()
##  pos variable label col_type missing values
##  1   a        —     dbl      0             
##  2   b        —     fct      0       No    
##                                      Yes   
##                                      DK    
##  3   c        —     fct      0       No    
##                                      Yes   
##                                      DK    
##  4   d        —     chr      0             
##  5   e        —     fct      1       No    
##                                      Yes
unlabelled(df, user_na_to_na = TRUE) %>% look_for()
##  pos variable label col_type missing values
##  1   a        —     dbl      0             
##  2   b        —     fct      0       No    
##                                      Yes   
##                                      DK    
##  3   c        —     fct      0       No    
##                                      Yes   
##                                      DK    
##  4   d        —     chr      0             
##  5   e        —     fct      1       No    
##                                      Yes
unlabelled(df, drop_unused_labels = TRUE) %>% look_for()
##  pos variable label col_type missing values
##  1   a        —     dbl      0             
##  2   b        —     fct      0       No    
##                                      Yes   
##                                      DK    
##  3   c        —     fct      0       No    
##                                      Yes   
##  4   d        —     chr      0             
##  5   e        —     fct      1       No    
##                                      Yes

Importing labelled data

In haven package, read_spss, read_stata and read_sas are natively importing data using the labelled class and the label attribute for variable labels.

Functions from foreign package could also import some metadata from SPSS and Stata files. to_labelled can convert data imported with foreign into a labelled data frame. However, there are some limitations compared to using haven:

  • For SPSS files, it will be better to set use.value.labels = FALSE, to.data.frame = FALSE and use.missings = FALSE when calling read.spss. If use.value.labels = TRUE, variable with value labels will be converted into factors by read.spss (and kept as factors by foreign_to_label). If to.data.frame = TRUE, meta data describing the missing values will not be imported. If use.missings = TRUE, missing values would have been converted to NA by read.spss.
  • For Stata files, set convert.factors = FALSE when calling read.dta to avoid conversion of variables with value labels into factors. So far, missing values defined in Stata are always imported as NA by read.dta and could not be retrieved by foreign_to_labelled.

The memisc package provide functions to import variable metadata and store them in specific object of class data.set. The to_labelled method can convert a data.set into a labelled data frame.

# from foreign
library(foreign)
df <- to_labelled(read.spss(
  "file.sav",
  to.data.frame = FALSE,
  use.value.labels = FALSE,
  use.missings = FALSE
))
df <- to_labelled(read.dta(
  "file.dta",
  convert.factors = FALSE
))

# from memisc
library(memisc)
nes1948.por <- UnZip("anes/NES1948.ZIP", "NES1948.POR", package = "memisc")
nes1948 <- spss.portable.file(nes1948.por)
df <- to_labelled(nes1948)
ds <- as.data.set(nes19480)
df <- to_labelled(ds)

Using labelled with dplyr/magrittr

If you are using the %>% operator, you can use the functions set_variable_labels(), set_value_labels(), add_value_labels() and remove_value_labels().

library(dplyr)
## 
## Attachement du package : 'dplyr'
## Les objets suivants sont masqués depuis 'package:stats':
## 
##     filter, lag
## Les objets suivants sont masqués depuis 'package:base':
## 
##     intersect, setdiff, setequal, union
df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) %>%
  set_variable_labels(s1 = "Sex", s2 = "Question") %>%
  set_value_labels(s1 = c(Male = "M", Female = "F"), s2 = c(Yes = 1, No = 2))
df$s2
## <labelled<double>[3]>: Question
## [1] 1 1 2
## 
## Labels:
##  value label
##      1   Yes
##      2    No

set_value_labels() will replace the list of value labels while add_value_labels() will update it.

df <- df %>%
  set_value_labels(s2 = c(Yes = 1, "Don't know" = 8, Unknown = 9))
df$s2
## <labelled<double>[3]>: Question
## [1] 1 1 2
## 
## Labels:
##  value      label
##      1        Yes
##      8 Don't know
##      9    Unknown
df <- df %>%
  add_value_labels(s2 = c(No = 2))
df$s2
## <labelled<double>[3]>: Question
## [1] 1 1 2
## 
## Labels:
##  value      label
##      1        Yes
##      8 Don't know
##      9    Unknown
##      2         No

You can also remove some variable and/or value labels.

df <- df %>%
  set_variable_labels(s1 = NULL)

# removing one value label
df <- df %>%
  remove_value_labels(s2 = 2)
df$s2
## <labelled<double>[3]>: Question
## [1] 1 1 2
## 
## Labels:
##  value      label
##      1        Yes
##      8 Don't know
##      9    Unknown
# removing several value labels
df <- df %>%
  remove_value_labels(s2 = 8:9)
df$s2
## <labelled<double>[3]>: Question
## [1] 1 1 2
## 
## Labels:
##  value label
##      1   Yes
# removing all value labels
df <- df %>%
  set_value_labels(s2 = NULL)
df$s2
## [1] 1 1 2
## attr(,"label")
## [1] "Question"

To convert variables, the easiest is to use unlabelled().

library(questionr)
data(fertility)
glimpse(women)
## Rows: 2,000
## Columns: 17
## $ id_woman          <dbl> 391, 1643, 85, 881, 1981, 1072, 1978, 1607, 738, 165…
## $ id_household      <dbl> 381, 1515, 85, 844, 1797, 1015, 1794, 1486, 711, 152…
## $ weight            <dbl> 1.803150, 1.803150, 1.803150, 1.803150, 1.803150, 0.…
## $ interview_date    <date> 2012-05-05, 2012-01-23, 2012-01-21, 2012-01-06, 201…
## $ date_of_birth     <date> 1997-03-07, 1982-01-06, 1979-01-01, 1968-03-29, 198…
## $ age               <dbl> 15, 30, 33, 43, 25, 18, 45, 23, 49, 31, 26, 45, 25, …
## $ residency         <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ region            <dbl+lbl> 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, …
## $ instruction       <dbl+lbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 0, …
## $ employed          <dbl+lbl> 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ matri             <dbl+lbl> 0, 2, 2, 2, 1, 0, 1, 1, 2, 5, 2, 3, 0, 2, 1, 2, …
## $ religion          <dbl+lbl> 1, 3, 2, 3, 2, 2, 3, 1, 3, 3, 2, 3, 2, 2, 2, 2, …
## $ newspaper         <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ radio             <dbl+lbl> 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, …
## $ tv                <dbl+lbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, …
## $ ideal_nb_children <dbl+lbl>  4,  4,  4,  4,  4,  5, 10,  5,  4,  5,  6, 10, …
## $ test              <dbl+lbl> 0, 9, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, …
glimpse(women %>% unlabelled())
## Rows: 2,000
## Columns: 17
## $ id_woman          <dbl> 391, 1643, 85, 881, 1981, 1072, 1978, 1607, 738, 165…
## $ id_household      <dbl> 381, 1515, 85, 844, 1797, 1015, 1794, 1486, 711, 152…
## $ weight            <dbl> 1.803150, 1.803150, 1.803150, 1.803150, 1.803150, 0.…
## $ interview_date    <date> 2012-05-05, 2012-01-23, 2012-01-21, 2012-01-06, 201…
## $ date_of_birth     <date> 1997-03-07, 1982-01-06, 1979-01-01, 1968-03-29, 198…
## $ age               <dbl> 15, 30, 33, 43, 25, 18, 45, 23, 49, 31, 26, 45, 25, …
## $ residency         <fct> rural, rural, rural, rural, rural, rural, rural, rur…
## $ region            <fct> West, West, West, West, West, South, South, South, S…
## $ instruction       <fct> none, none, none, none, primary, none, none, none, n…
## $ employed          <fct> yes, yes, no, yes, yes, no, yes, no, yes, yes, yes, …
## $ matri             <fct> single, living together, living together, living tog…
## $ religion          <fct> Muslim, Protestant, Christian, Protestant, Christian…
## $ newspaper         <fct> no, no, no, no, no, no, no, no, no, no, no, no, no, …
## $ radio             <fct> no, yes, yes, no, no, yes, yes, no, no, no, yes, yes…
## $ tv                <fct> no, no, no, no, no, yes, no, no, no, no, yes, yes, n…
## $ ideal_nb_children <dbl> 4, 4, 4, 4, 4, 5, 10, 5, 4, 5, 6, 10, 2, 6, 6, 6, 4,…
## $ test              <fct> no, missing, no, no, yes, no, no, no, no, yes, yes, …

Alternatively, you can use functions as dplyr::mutate() + dplyr::across(). See the example below.

glimpse(to_factor(women))
## Rows: 2,000
## Columns: 17
## $ id_woman          <dbl> 391, 1643, 85, 881, 1981, 1072, 1978, 1607, 738, 165…
## $ id_household      <dbl> 381, 1515, 85, 844, 1797, 1015, 1794, 1486, 711, 152…
## $ weight            <dbl> 1.803150, 1.803150, 1.803150, 1.803150, 1.803150, 0.…
## $ interview_date    <date> 2012-05-05, 2012-01-23, 2012-01-21, 2012-01-06, 201…
## $ date_of_birth     <date> 1997-03-07, 1982-01-06, 1979-01-01, 1968-03-29, 198…
## $ age               <dbl> 15, 30, 33, 43, 25, 18, 45, 23, 49, 31, 26, 45, 25, …
## $ residency         <fct> rural, rural, rural, rural, rural, rural, rural, rur…
## $ region            <fct> West, West, West, West, West, South, South, South, S…
## $ instruction       <fct> none, none, none, none, primary, none, none, none, n…
## $ employed          <fct> yes, yes, no, yes, yes, no, yes, no, yes, yes, yes, …
## $ matri             <fct> single, living together, living together, living tog…
## $ religion          <fct> Muslim, Protestant, Christian, Protestant, Christian…
## $ newspaper         <fct> no, no, no, no, no, no, no, no, no, no, no, no, no, …
## $ radio             <fct> no, yes, yes, no, no, yes, yes, no, no, no, yes, yes…
## $ tv                <fct> no, no, no, no, no, yes, no, no, no, no, yes, yes, n…
## $ ideal_nb_children <fct> 4, 4, 4, 4, 4, 5, 10, 5, 4, 5, 6, 10, 2, 6, 6, 6, 4,…
## $ test              <fct> no, missing, no, no, yes, no, no, no, no, yes, yes, …
glimpse(women %>% mutate(across(where(is.labelled), to_factor)))
## Rows: 2,000
## Columns: 17
## $ id_woman          <dbl> 391, 1643, 85, 881, 1981, 1072, 1978, 1607, 738, 165…
## $ id_household      <dbl> 381, 1515, 85, 844, 1797, 1015, 1794, 1486, 711, 152…
## $ weight            <dbl> 1.803150, 1.803150, 1.803150, 1.803150, 1.803150, 0.…
## $ interview_date    <date> 2012-05-05, 2012-01-23, 2012-01-21, 2012-01-06, 201…
## $ date_of_birth     <date> 1997-03-07, 1982-01-06, 1979-01-01, 1968-03-29, 198…
## $ age               <dbl> 15, 30, 33, 43, 25, 18, 45, 23, 49, 31, 26, 45, 25, …
## $ residency         <fct> rural, rural, rural, rural, rural, rural, rural, rur…
## $ region            <fct> West, West, West, West, West, South, South, South, S…
## $ instruction       <fct> none, none, none, none, primary, none, none, none, n…
## $ employed          <fct> yes, yes, no, yes, yes, no, yes, no, yes, yes, yes, …
## $ matri             <fct> single, living together, living together, living tog…
## $ religion          <fct> Muslim, Protestant, Christian, Protestant, Christian…
## $ newspaper         <fct> no, no, no, no, no, no, no, no, no, no, no, no, no, …
## $ radio             <fct> no, yes, yes, no, no, yes, yes, no, no, no, yes, yes…
## $ tv                <fct> no, no, no, no, no, yes, no, no, no, no, yes, yes, n…
## $ ideal_nb_children <fct> 4, 4, 4, 4, 4, 5, 10, 5, 4, 5, 6, 10, 2, 6, 6, 6, 4,…
## $ test              <fct> no, missing, no, no, yes, no, no, no, no, yes, yes, …
glimpse(women %>% mutate(across(employed:religion, to_factor)))
## Rows: 2,000
## Columns: 17
## $ id_woman          <dbl> 391, 1643, 85, 881, 1981, 1072, 1978, 1607, 738, 165…
## $ id_household      <dbl> 381, 1515, 85, 844, 1797, 1015, 1794, 1486, 711, 152…
## $ weight            <dbl> 1.803150, 1.803150, 1.803150, 1.803150, 1.803150, 0.…
## $ interview_date    <date> 2012-05-05, 2012-01-23, 2012-01-21, 2012-01-06, 201…
## $ date_of_birth     <date> 1997-03-07, 1982-01-06, 1979-01-01, 1968-03-29, 198…
## $ age               <dbl> 15, 30, 33, 43, 25, 18, 45, 23, 49, 31, 26, 45, 25, …
## $ residency         <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ region            <dbl+lbl> 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, …
## $ instruction       <dbl+lbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 0, …
## $ employed          <fct> yes, yes, no, yes, yes, no, yes, no, yes, yes, yes, …
## $ matri             <fct> single, living together, living together, living tog…
## $ religion          <fct> Muslim, Protestant, Christian, Protestant, Christian…
## $ newspaper         <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ radio             <dbl+lbl> 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, …
## $ tv                <dbl+lbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, …
## $ ideal_nb_children <dbl+lbl>  4,  4,  4,  4,  4,  5, 10,  5,  4,  5,  6, 10, …
## $ test              <dbl+lbl> 0, 9, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, …
labelled/inst/WORDLIST0000644000176200001440000000041214737244525014177 0ustar liggesusersCMD Cheatsheet Codecov DOI Lifecycle RStudio Recode SPSS's Stata briatte cheatsheet df dplyr gmail joseph larmarange magrittr memisc na natively recode recoded recoding roxygen spss tbl tibble tibbles tidyr unclass unclassed untagged labelled/README.md0000644000176200001440000000510414736716451013313 0ustar liggesusers# labelled [![Project Status: Active - The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/0.1.0/active.svg)](https://www.repostatus.org/#active) [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) [![R-CMD-check](https://github.com/larmarange/labelled/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/larmarange/labelled/actions/workflows/R-CMD-check.yaml) [![CRAN\_Status\_Badge](https://www.r-pkg.org/badges/version/labelled)](https://cran.r-project.org/package=labelled) [![Downloads](https://cranlogs.r-pkg.org/badges/labelled)](https://cran.r-project.org/package=labelled) [![DOI](https://www.zenodo.org/badge/38772078.svg)](https://zenodo.org/badge/latestdoi/38772078) [![Codecov test coverage](https://codecov.io/gh/larmarange/labelled/graph/badge.svg)](https://app.codecov.io/gh/larmarange/labelled) This package is built on the new classes `haven_labelled` and `haven_labelled_spss` introduced by `haven` package to handle labelled variables imported from SPSS, Stata and SAS. The `labelled` package propose several functions to manipulate such vectors and their metadata: variable labels, value labels and user-defined missing values. ## Installation & Documentation To install **stable version**: ``` install.packages("labelled") ``` Documentation of stable version: To install **development version**: ``` remotes::install_github("larmarange/labelled") ``` Documentation of development version: ## Introduction Read the vignette at ## Cheatsheet [![labelled cheatsheet](https://github.com/larmarange/labelled/raw/main/cheatsheet/labelled_cheatsheet.png)](https://github.com/larmarange/labelled/raw/main/cheatsheet/labelled_cheatsheet.pdf) ## Some general guidelines 1. Functions are intended to support `labelled` metadata structures only. However, `to_labelled()` method allows to convert metadata from **foreign** and **memisc** packages. 2. Functions should, by default, modify metadata only (i.e. classes and attributes), except if explicitly expressed by the user. labelled/build/0000755000176200001440000000000014737431515013127 5ustar liggesuserslabelled/build/vignette.rds0000644000176200001440000000056514737431515015474 0ustar liggesusersRQk0ڹ)d`/ oscilLIRŷKۤ}rzߗ|v ha-ִگcKN9[BKLl)UJLij-)jdC5Y$y"rq$4 (P@c S4 -Q,8j1Wz{7wFL 8P8DX߃!q4<*|$"%[n+2 /A 852tU"үO9ey#cg^شI7p8wfCu2un+>sxN*{>%(z^濬9uE'+`7{qå HB5labelled/man/0000755000176200001440000000000014737244525012606 5ustar liggesuserslabelled/man/update_labelled.Rd0000644000176200001440000000252314357761455016211 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/retrocompatibility.R \name{update_labelled} \alias{update_labelled} \alias{update_labelled.labelled} \alias{update_labelled.haven_labelled_spss} \alias{update_labelled.haven_labelled} \alias{update_labelled.data.frame} \title{Update labelled data to last version} \usage{ update_labelled(x) \method{update_labelled}{labelled}(x) \method{update_labelled}{haven_labelled_spss}(x) \method{update_labelled}{haven_labelled}(x) \method{update_labelled}{data.frame}(x) } \arguments{ \item{x}{An object (vector or data.frame) to convert.} } \description{ Labelled data imported with \pkg{haven} version 1.1.2 or before or created with \code{\link[haven:labelled]{haven::labelled()}} version 1.1.0 or before was using "labelled" and "labelled_spss" classes. } \details{ Since version 2.0.0 of these two packages, "haven_labelled" and "haven_labelled_spss" are used instead. Since haven 2.3.0, "haven_labelled" class has been evolving using now \pkg{vctrs} package. \code{update_labelled()} convert labelled vectors from the old to the new classes and to reconstruct all labelled vectors with the last version of the package. } \seealso{ \code{\link[haven:labelled]{haven::labelled()}}, \code{\link[haven:labelled_spss]{haven::labelled_spss()}} } labelled/man/remove_labels.Rd0000644000176200001440000000362514357761455015726 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/remove_labels.R \name{remove_labels} \alias{remove_labels} \alias{remove_var_label} \alias{remove_val_labels} \alias{remove_user_na} \title{Remove variable label, value labels and user defined missing values} \usage{ remove_labels( x, user_na_to_na = FALSE, keep_var_label = FALSE, user_na_to_tagged_na = FALSE ) remove_var_label(x) remove_val_labels(x) remove_user_na(x, user_na_to_na = FALSE, user_na_to_tagged_na = FALSE) } \arguments{ \item{x}{A vector or a data frame.} \item{user_na_to_na}{Convert user defined missing values into \code{NA}?} \item{keep_var_label}{Keep variable label?} \item{user_na_to_tagged_na}{Convert user defined missing values into tagged \code{NA}? It could be applied only to numeric vectors. Note that integer labelled vectors will be converted to double labelled vectors.} } \description{ Use \code{remove_var_label()} to remove variable label, \code{remove_val_labels()} to remove value labels, \code{remove_user_na()} to remove user defined missing values (\emph{na_values} and \emph{na_range}) and \code{remove_labels()} to remove all. } \details{ Be careful with \code{remove_user_na()} and \code{remove_labels()}, user defined missing values will not be automatically converted to \code{NA}, except if you specify \code{user_na_to_na = TRUE}. \code{user_na_to_na(x)} is an equivalent of \code{remove_user_na(x, user_na_to_na = TRUE)}. If you prefer to convert variables with value labels into factors, use \code{\link[=to_factor]{to_factor()}} or use \code{\link[=unlabelled]{unlabelled()}}. } \examples{ x <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) var_label(x) <- "A variable" x remove_labels(x) remove_labels(x, user_na_to_na = TRUE) remove_user_na(x, user_na_to_na = TRUE) remove_user_na(x, user_na_to_tagged_na = TRUE) } labelled/man/val_labels.Rd0000644000176200001440000000773014466735327015214 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/val_labels.R \name{val_labels} \alias{val_labels} \alias{val_labels<-} \alias{val_label} \alias{val_label<-} \alias{get_value_labels} \alias{set_value_labels} \alias{add_value_labels} \alias{remove_value_labels} \title{Get / Set value labels} \usage{ val_labels(x, prefixed = FALSE) val_labels(x, null_action = c("unclass", "labelled")) <- value val_label(x, v, prefixed = FALSE) val_label(x, v, null_action = c("unclass", "labelled")) <- value get_value_labels(x, prefixed = FALSE) set_value_labels( .data, ..., .labels = NA, .strict = TRUE, .null_action = c("unclass", "labelled") ) add_value_labels( .data, ..., .strict = TRUE, .null_action = c("unclass", "labelled") ) remove_value_labels( .data, ..., .strict = TRUE, .null_action = c("unclass", "labelled") ) } \arguments{ \item{x}{A vector or a data.frame} \item{prefixed}{Should labels be prefixed with values?} \item{null_action, .null_action}{for advanced users, if \code{value = NULL}, unclass the vector (default) or force/keep \code{haven_labelled} class (if \code{null_action = "labelled"})} \item{value}{A named vector for \code{val_labels()} (see \code{\link[haven:labelled]{haven::labelled()}}) or a character string for \code{val_label()}. \code{NULL} to remove the labels (except if \code{null_action = "labelled"}). For data frames, it could also be a named list with a vector of value labels per variable.} \item{v}{A single value.} \item{.data}{a data frame or a vector} \item{...}{name-value pairs of value labels (see examples)} \item{.labels}{value labels to be applied to the data.frame, using the same syntax as \code{value} in \code{val_labels(df) <- value}.} \item{.strict}{should an error be returned if some labels doesn't correspond to a column of \code{x}?} } \value{ \code{val_labels()} will return a named vector. \code{val_label()} will return a single character string. \code{set_value_labels()}, \code{add_value_labels()} and \code{remove_value_labels()} will return an updated copy of \code{.data}. } \description{ Get / Set value labels } \note{ \code{get_value_labels()} is identical to \code{val_labels()}. \code{set_value_labels()}, \code{add_value_labels()} and \code{remove_value_labels()} could be used with \pkg{dplyr} syntax. While \code{set_value_labels()} will replace the list of value labels, \code{add_value_labels()} and \code{remove_value_labels()} will update that list (see examples). \code{set_value_labels()} could also be applied to a vector / a data.frame column. In such case, you can provide a vector of value labels using \code{.labels} or several name-value pairs of value labels (see example). Similarly, \code{add_value_labels()} and \code{remove_value_labels()} could also be applied on vectors. } \examples{ v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) val_labels(v) val_labels(v, prefixed = TRUE) val_label(v, 2) val_label(v, 2) <- "maybe" v val_label(v, 9) <- NULL v val_labels(v, null_action = "labelled") <- NULL v val_labels(v) <- NULL v if (require(dplyr)) { # setting value labels df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) \%>\% set_value_labels( s1 = c(Male = "M", Female = "F"), s2 = c(Yes = 1, No = 2) ) val_labels(df) # updating value labels df <- df \%>\% add_value_labels(s2 = c(Unknown = 9)) df$s2 # removing a value labels df <- df \%>\% remove_value_labels(s2 = 9) df$s2 # removing all value labels df <- df \%>\% set_value_labels(s2 = NULL) df$s2 # example on a vector v <- 1:4 v <- set_value_labels(v, min = 1, max = 4) v v \%>\% set_value_labels(middle = 3) v \%>\% set_value_labels(NULL) v \%>\% set_value_labels(.labels = c(a = 1, b = 2, c = 3, d = 4)) v \%>\% add_value_labels(between = 2) v \%>\% remove_value_labels(4) } } labelled/man/update_variable_labels_with.Rd0000644000176200001440000000335714677521125020606 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/update_with.R \name{update_variable_labels_with} \alias{update_variable_labels_with} \alias{update_value_labels_with} \title{Update variable/value labels with a function} \usage{ update_variable_labels_with(.data, .fn, .cols = dplyr::everything(), ...) update_value_labels_with(.data, .fn, .cols = dplyr::everything(), ...) } \arguments{ \item{.data}{A data frame, or data frame extension (e.g. a tibble)} \item{.fn}{A function used to transform the variable/value labels of the selected \code{.cols}.} \item{.cols}{Columns to update; defaults to all columns. Use tidy selection.} \item{...}{additional arguments passed onto \code{.fn}.} } \description{ Update variable/value labels with a function } \details{ For \code{update_variable_labels_with()}, it is possible to access the name of the variable inside \code{.fn} by using \code{names()}, i.e. \code{.fn} receive a named character vector (see example). \code{.fn} can return \code{as.character(NA)} to remove a variable label. } \examples{ df <- iris \%>\% set_variable_labels( Sepal.Length = "Length of sepal", Sepal.Width = "Width of sepal", Petal.Length = "Length of petal", Petal.Width = "Width of petal", Species = "Species" ) df$Species <- to_labelled(df$Species) df \%>\% look_for() df \%>\% update_variable_labels_with(toupper) \%>\% look_for() # accessing variable names with names() df \%>\% update_variable_labels_with(function(x){tolower(names(x))}) \%>\% look_for() df \%>\% update_variable_labels_with(toupper, .cols = dplyr::starts_with("S")) \%>\% look_for() df \%>\% update_value_labels_with(toupper) \%>\% look_for() } labelled/man/to_character.Rd0000644000176200001440000000504214466735327015540 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/to_character.R \name{to_character} \alias{to_character} \alias{to_character.double} \alias{to_character.haven_labelled} \alias{to_character.data.frame} \title{Convert input to a character vector} \usage{ to_character(x, ...) \method{to_character}{double}(x, explicit_tagged_na = FALSE, ...) \method{to_character}{haven_labelled}( x, levels = c("labels", "values", "prefixed"), nolabel_to_na = FALSE, user_na_to_na = FALSE, explicit_tagged_na = FALSE, ... ) \method{to_character}{data.frame}( x, levels = c("labels", "values", "prefixed"), nolabel_to_na = FALSE, user_na_to_na = FALSE, explicit_tagged_na = FALSE, labelled_only = TRUE, ... ) } \arguments{ \item{x}{Object to coerce to a character vector.} \item{...}{Other arguments passed down to method.} \item{explicit_tagged_na}{should tagged NA be kept?} \item{levels}{What should be used for the factor levels: the labels, the values or labels prefixed with values?} \item{nolabel_to_na}{Should values with no label be converted to \code{NA}?} \item{user_na_to_na}{user defined missing values into NA?} \item{labelled_only}{for a data.frame, convert only labelled variables to factors?} } \description{ By default, \code{to_character()} is a wrapper for \code{\link[base:character]{base::as.character()}}. For labelled vector, to_character allows to specify if value, labels or labels prefixed with values should be used for conversion. } \details{ If some values doesn't have a label, automatic labels will be created, except if \code{nolabel_to_na} is \code{TRUE}. When applied to a data.frame, only labelled vectors are converted by default to character. Use \code{labelled_only = FALSE} to convert all variables to characters. } \examples{ v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) to_character(v) to_character(v, nolabel_to_na = TRUE) to_character(v, "v") to_character(v, "p") df <- data.frame( a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), c = labelled( c("a", "a", "b", "c"), labels = c(No = "a", Maybe = "b", Yes = "c") ), d = 1:4, e = factor(c("item1", "item2", "item1", "item2")), f = c("itemA", "itemA", "itemB", "itemB"), stringsAsFactors = FALSE ) if (require(dplyr)) { glimpse(df) glimpse(to_character(df)) glimpse(to_character(df, labelled_only = FALSE)) } } labelled/man/look_for.Rd0000644000176200001440000001234014737244525014707 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lookfor.R \name{look_for} \alias{look_for} \alias{lookfor} \alias{generate_dictionary} \alias{print.look_for} \alias{look_for_and_select} \alias{convert_list_columns_to_character} \alias{lookfor_to_long_format} \title{Look for keywords variable names and descriptions / Create a data dictionary} \source{ Inspired by the \code{lookfor} command in Stata. } \usage{ look_for( data, ..., labels = TRUE, values = TRUE, ignore.case = TRUE, details = c("basic", "none", "full") ) lookfor( data, ..., labels = TRUE, values = TRUE, ignore.case = TRUE, details = c("basic", "none", "full") ) generate_dictionary( data, ..., labels = TRUE, values = TRUE, ignore.case = TRUE, details = c("basic", "none", "full") ) \method{print}{look_for}(x, ...) look_for_and_select( data, ..., labels = TRUE, values = TRUE, ignore.case = TRUE ) convert_list_columns_to_character(x) lookfor_to_long_format(x) } \arguments{ \item{data}{a data frame or a survey object} \item{...}{optional list of keywords, a character string (or several character strings), which can be formatted as a regular expression suitable for a \code{\link[base:grep]{base::grep()}} pattern, or a vector of keywords; displays all variables if not specified} \item{labels}{whether or not to search variable labels (descriptions); \code{TRUE} by default} \item{values}{whether or not to search within values (factor levels or value labels); \code{TRUE} by default} \item{ignore.case}{whether or not to make the keywords case sensitive; \code{TRUE} by default (case is ignored during matching)} \item{details}{add details about each variable (full details could be time consuming for big data frames, \code{FALSE} is equivalent to \code{"none"} and \code{TRUE} to \code{"full"})} \item{x}{a tibble returned by \code{look_for()}} } \value{ a tibble data frame featuring the variable position, name and description (if it exists) in the original data frame } \description{ \code{look_for} emulates the \code{lookfor} Stata command in \R. It supports searching into the variable names of regular \R data frames as well as into variable labels descriptions, factor levels and value labels. The command is meant to help users finding variables in large datasets. } \details{ When no keyword is provided, it will produce a data dictionary of the overall data frame. The function looks into the variable names for matches to the keywords. If available, variable labels are included in the search scope. Variable labels of data.frame imported with \pkg{foreign} or \pkg{memisc} packages will also be taken into account (see \code{\link[=to_labelled]{to_labelled()}}). If no keyword is provided, it will return all variables of \code{data}. \code{look_for()}, \code{lookfor()} and \code{generate_dictionary()} are equivalent. By default, results will be summarized when printing. To deactivate default printing, use \code{dplyr::as_tibble()}. \code{lookfor_to_long_format()} could be used to transform results with one row per factor level and per value label. Use \code{convert_list_columns_to_character()} to convert named list columns into character vectors (see examples). \code{look_for_and_select()} is a shortcut for selecting some variables and applying \code{dplyr::select()} to return a data frame with only the selected variables. } \examples{ look_for(iris) # Look for a single keyword. look_for(iris, "petal") look_for(iris, "s") iris \%>\% look_for_and_select("s") \%>\% head() # Look for with a regular expression look_for(iris, "petal|species") look_for(iris, "s$") # Look for with several keywords look_for(iris, "pet", "sp") look_for(iris, "pet", "sp", "width") look_for(iris, "Pet", "sp", "width", ignore.case = FALSE) # Look_for can search within factor levels or value labels look_for(iris, "vers") # Quicker search without variable details look_for(iris, details = "none") # To obtain more details about each variable look_for(iris, details = "full") # To deactivate default printing, convert to tibble look_for(iris, details = "full") \%>\% dplyr::as_tibble() # To convert named lists into character vectors look_for(iris) \%>\% convert_list_columns_to_character() # Long format with one row per factor and per value label look_for(iris) \%>\% lookfor_to_long_format() # Both functions can be combined look_for(iris) \%>\% lookfor_to_long_format() \%>\% convert_list_columns_to_character() # Labelled data d <- dplyr::tibble( region = labelled_spss( c(1, 2, 1, 9, 2, 3), c(north = 1, south = 2, center = 3, missing = 9), na_values = 9, label = "Region of the respondent" ), sex = labelled( c("f", "f", "m", "m", "m", "f"), c(female = "f", male = "m"), label = "Sex of the respondent" ) ) look_for(d) d \%>\% look_for() \%>\% lookfor_to_long_format() \%>\% convert_list_columns_to_character() } \seealso{ \code{vignette("look_for")} } \author{ François Briatte \href{mailto:f.briatte@gmail.com}{f.briatte@gmail.com}, Joseph Larmarange \href{mailto:joseph@larmarange.net}{joseph@larmarange.net} } labelled/man/na_values.Rd0000644000176200001440000001143014466735327015055 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/na_values.R \name{na_values} \alias{na_values} \alias{na_values<-} \alias{na_range} \alias{na_range<-} \alias{get_na_values} \alias{get_na_range} \alias{set_na_values} \alias{set_na_range} \alias{is_user_na} \alias{is_regular_na} \alias{user_na_to_na} \alias{user_na_to_regular_na} \alias{user_na_to_tagged_na} \title{Get / Set SPSS missing values} \usage{ na_values(x) na_values(x) <- value na_range(x) na_range(x) <- value get_na_values(x) get_na_range(x) set_na_values(.data, ..., .values = NA, .strict = TRUE) set_na_range(.data, ..., .values = NA, .strict = TRUE) is_user_na(x) is_regular_na(x) user_na_to_na(x) user_na_to_regular_na(x) user_na_to_tagged_na(x) } \arguments{ \item{x}{A vector (or a data frame).} \item{value}{A vector of values that should also be considered as missing (for \code{na_values}) or a numeric vector of length two giving the (inclusive) extents of the range (for \code{na_values}, use \code{-Inf} and \code{Inf} if you want the range to be open ended).} \item{.data}{a data frame or a vector} \item{...}{name-value pairs of missing values (see examples)} \item{.values}{missing values to be applied to the data.frame, using the same syntax as \code{value} in \code{na_values(df) <- value} or \code{na_range(df) <- value}.} \item{.strict}{should an error be returned if some labels doesn't correspond to a column of \code{x}?} } \value{ \code{na_values()} will return a vector of values that should also be considered as missing. \code{na_range()} will return a numeric vector of length two giving the (inclusive) extents of the range. \code{set_na_values()} and \code{set_na_range()} will return an updated copy of \code{.data}. } \description{ Get / Set SPSS missing values } \details{ See \code{\link[haven:labelled_spss]{haven::labelled_spss()}} for a presentation of SPSS's user defined missing values. Note that \code{\link[base:NA]{base::is.na()}} will return \code{TRUE} for user defined missing values. It will also return \code{TRUE} for regular \code{NA} values. If you want to test if a specific value is a user NA but not a regular \code{NA}, use \code{is_user_na()}. If you want to test if a value is a regular \code{NA} but not a user NA, not a tagged NA, use \code{is_regular_na()}. You can use \code{\link[=user_na_to_na]{user_na_to_na()}} to convert user defined missing values to regular \code{NA}. Note that any value label attached to a user defined missing value will be lost. \code{\link[=user_na_to_regular_na]{user_na_to_regular_na()}} is a synonym of \code{\link[=user_na_to_na]{user_na_to_na()}}. The method \code{\link[=user_na_to_tagged_na]{user_na_to_tagged_na()}} will convert user defined missing values into \code{\link[haven:tagged_na]{haven::tagged_na()}}, preserving value labels. Please note that \code{\link[haven:tagged_na]{haven::tagged_na()}} are defined only for double vectors. Therefore, integer \code{haven_labelled_spss} vectors will be converted into double \code{haven_labelled} vectors; and \code{\link[=user_na_to_tagged_na]{user_na_to_tagged_na()}} cannot be applied to a character \code{haven_labelled_spss} vector. \code{\link[=tagged_na_to_user_na]{tagged_na_to_user_na()}} is the opposite of \code{\link[=user_na_to_tagged_na]{user_na_to_tagged_na()}} and convert tagged \code{NA} into user defined missing values. } \note{ \code{get_na_values()} is identical to \code{na_values()} and \code{get_na_range()} to \code{na_range()}. \code{set_na_values()} and \code{set_na_range()} could be used with \pkg{dplyr} syntax. } \examples{ v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) v na_values(v) <- 9 na_values(v) v is.na(v) # TRUE for the 6th and 10th values is_user_na(v) # TRUE only for the 6th value user_na_to_na(v) na_values(v) <- NULL v na_range(v) <- c(5, Inf) na_range(v) v user_na_to_na(v) user_na_to_tagged_na(v) # it is not recommended to mix user NAs and tagged NAs x <- c(NA, 9, tagged_na("a")) na_values(x) <- 9 x is.na(x) is_user_na(x) is_tagged_na(x) is_regular_na(x) if (require(dplyr)) { # setting value label and user NAs df <- tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) \%>\% set_value_labels(s2 = c(yes = 1, no = 2)) \%>\% set_na_values(s2 = 9) na_values(df) # removing missing values df <- df \%>\% set_na_values(s2 = NULL) df$s2 # example with a vector v <- 1:10 v <- v \%>\% set_na_values(5, 6, 7) v v \%>\% set_na_range(8, 10) v \%>\% set_na_range(.values = c(9, 10)) v \%>\% set_na_values(NULL) } } \seealso{ \code{\link[haven:labelled_spss]{haven::labelled_spss()}}, \code{\link[=user_na_to_na]{user_na_to_na()}} } labelled/man/drop_unused_value_labels.Rd0000644000176200001440000000075614357761455020156 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/drop_unused_value_labels.R \name{drop_unused_value_labels} \alias{drop_unused_value_labels} \title{Drop unused value labels} \usage{ drop_unused_value_labels(x) } \arguments{ \item{x}{A vector or a data frame.} } \description{ Drop value labels associated to a value not present in the data. } \examples{ x <- labelled(c(1, 2, 2, 1), c(yes = 1, no = 2, maybe = 3)) x drop_unused_value_labels(x) } labelled/man/recode_if.Rd0000644000176200001440000000234514466735327015024 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/recode_if.R \name{recode_if} \alias{recode_if} \title{Recode some values based on condition} \usage{ recode_if(x, condition, true) } \arguments{ \item{x}{vector to be recoded} \item{condition}{logical vector of same length as \code{x}} \item{true}{values to use for \code{TRUE} values of \code{condition}. It must be either the same length as \code{x}, or length 1.} } \value{ Returns \code{x} with values replaced by \code{true} when \code{condition} is \code{TRUE} and unchanged when \code{condition} is \code{FALSE} or \code{NA}. Variable and value labels are preserved unchanged. } \description{ Recode some values based on condition } \examples{ v <- labelled(c(1, 2, 2, 9), c(yes = 1, no = 2)) v \%>\% recode_if(v == 9, NA) if (require(dplyr)) { df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 2, 1)) \%>\% set_value_labels( s1 = c(Male = "M", Female = "F"), s2 = c(A = 1, B = 2) ) \%>\% set_variable_labels(s1 = "Gender", s2 = "Group") df <- df \%>\% mutate( s3 = s2 \%>\% recode_if(s1 == "F", 2), s4 = s2 \%>\% recode_if(s1 == "M", s2 + 10) ) df df \%>\% look_for() } } labelled/man/copy_labels.Rd0000644000176200001440000000361714466735327015404 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/copy_labels.R \name{copy_labels} \alias{copy_labels} \alias{copy_labels_from} \title{Copy variable and value labels and SPSS-style missing value} \usage{ copy_labels(from, to, .strict = TRUE) copy_labels_from(to, from, .strict = TRUE) } \arguments{ \item{from}{A vector or a data.frame (or tibble) to copy labels from.} \item{to}{A vector or data.frame (or tibble) to copy labels to.} \item{.strict}{When \code{from} is a labelled vector, \code{to} have to be of the same type (numeric or character) in order to copy value labels and SPSS-style missing values. If this is not the case and \code{.strict = TRUE}, an error will be produced. If \code{.strict = FALSE}, only variable label will be copied.} } \description{ This function copies variable and value labels (including missing values) from one vector to another or from one data frame to another data frame. For data frame, labels are copied according to variable names, and only if variables are the same type in both data frames. } \details{ Some base \R functions like \code{\link[base:subset]{base::subset()}} drop variable and value labels attached to a variable. \code{copy_labels} could be used to restore these attributes. \code{copy_labels_from} is intended to be used with \pkg{dplyr} syntax, see examples. } \examples{ library(dplyr) df <- tibble( id = 1:3, happy = factor(c("yes", "no", "yes")), gender = labelled(c(1, 1, 2), c(female = 1, male = 2)) ) \%>\% set_variable_labels( id = "Individual ID", happy = "Are you happy?", gender = "Gender of respondent" ) var_label(df) fdf <- df \%>\% filter(id < 3) var_label(fdf) # some variable labels have been lost fdf <- fdf \%>\% copy_labels_from(df) var_label(fdf) # Alternative syntax fdf <- subset(df, id < 3) fdf <- copy_labels(from = df, to = fdf) } labelled/man/recode.haven_labelled.Rd0000644000176200001440000000675014737244525017272 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/recode.R \name{recode.haven_labelled} \alias{recode.haven_labelled} \title{Recode values} \usage{ \method{recode}{haven_labelled}( .x, ..., .default = NULL, .missing = NULL, .keep_value_labels = TRUE, .combine_value_labels = FALSE, .sep = " / " ) } \arguments{ \item{.x}{A vector to modify} \item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Replacements. For character and factor \code{.x}, these should be named and replacement is based only on their name. For numeric \code{.x}, these can be named or not. If not named, the replacement is done based on position i.e. \code{.x} represents positions to look for in replacements. See examples. When named, the argument names should be the current values to be replaced, and the argument values should be the new (replacement) values. All replacements must be the same type, and must have either length one or the same length as \code{.x}.} \item{.default}{If supplied, all values not otherwise matched will be given this value. If not supplied and if the replacements are the same type as the original values in \code{.x}, unmatched values are not changed. If not supplied and if the replacements are not compatible, unmatched values are replaced with \code{NA}. \code{.default} must be either length 1 or the same length as \code{.x}.} \item{.missing}{If supplied, any missing values in \code{.x} will be replaced by this value. Must be either length 1 or the same length as \code{.x}.} \item{.keep_value_labels}{If \code{TRUE}, keep original value labels. If \code{FALSE}, remove value labels.} \item{.combine_value_labels}{If \code{TRUE}, will combine original value labels to generate new value labels. Note that unexpected results could be obtained if a same old value is recoded into several different new values.} \item{.sep}{Separator to be used when combining value labels.} } \description{ Extend \code{\link[dplyr:recode]{dplyr::recode()}} method from \pkg{dplyr} to works with labelled vectors. } \examples{ x <- labelled(1:3, c(yes = 1, no = 2)) x dplyr::recode(x, `3` = 2L) # do not keep value labels dplyr::recode(x, `3` = 2L, .keep_value_labels = FALSE) # be careful, changes are not of the same type (here integers), # NA arecreated dplyr::recode(x, `3` = 2) # except if you provide .default or new values for all old values dplyr::recode(x, `1` = 1, `2` = 1, `3` = 2) # if you change the type of the vector (here transformed into character) # value labels are lost dplyr::recode(x, `3` = "b", .default = "a") # use .keep_value_labels = FALSE to avoid a warning dplyr::recode(x, `3` = "b", .default = "a", .keep_value_labels = FALSE) # combine value labels x <- labelled( 1:4, c( "strongly agree" = 1, "agree" = 2, "disagree" = 3, "strongly disagree" = 4 ) ) dplyr::recode( x, `1` = 1L, `2` = 1L, `3` = 2L, `4` = 2L, .combine_value_labels = TRUE ) dplyr::recode( x, `2` = 1L, `4` = 3L, .combine_value_labels = TRUE ) dplyr::recode( x, `2` = 1L, `4` = 3L, .combine_value_labels = TRUE, .sep = " or " ) dplyr::recode( x, `2` = 1L, .default = 2L, .combine_value_labels = TRUE ) # example when combining some values without a label y <- labelled(1:4, c("strongly agree" = 1)) dplyr::recode(y, `2` = 1L, `4` = 3L, .combine_value_labels = TRUE) } \seealso{ \code{\link[dplyr:recode]{dplyr::recode()}} } labelled/man/reexports.Rd0000644000176200001440000000175014357761455015137 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/labelled.R, R/tagged_na.R \docType{import} \name{reexports} \alias{reexports} \alias{labelled} \alias{is.labelled} \alias{labelled_spss} \alias{print_labels} \alias{\%>\%} \alias{tagged_na} \alias{na_tag} \alias{is_tagged_na} \alias{format_tagged_na} \alias{print_tagged_na} \title{Objects exported from other packages} \keyword{internal} \description{ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ \item{dplyr}{\code{\link[dplyr:reexports]{\%>\%}}} \item{haven}{\code{\link[haven:tagged_na]{format_tagged_na}}, \code{\link[haven:labelled]{is.labelled}}, \code{\link[haven:tagged_na]{is_tagged_na}}, \code{\link[haven]{labelled}}, \code{\link[haven]{labelled_spss}}, \code{\link[haven:tagged_na]{na_tag}}, \code{\link[haven]{print_labels}}, \code{\link[haven:tagged_na]{print_tagged_na}}, \code{\link[haven]{tagged_na}}} }} labelled/man/names_prefixed_by_values.Rd0000644000176200001440000000126414357761455020146 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/val_labels.R \name{names_prefixed_by_values} \alias{names_prefixed_by_values} \title{Turn a named vector into a vector of names prefixed by values} \usage{ names_prefixed_by_values(x) } \arguments{ \item{x}{vector to be prefixed} } \description{ Turn a named vector into a vector of names prefixed by values } \examples{ df <- dplyr::tibble( c1 = labelled(c("M", "M", "F"), c(Male = "M", Female = "F")), c2 = labelled(c(1, 1, 2), c(Yes = 1, No = 2)) ) val_labels(df$c1) val_labels(df$c1) \%>\% names_prefixed_by_values() val_labels(df) val_labels(df) \%>\% names_prefixed_by_values() } labelled/man/figures/0000755000176200001440000000000014736716451014253 5ustar liggesuserslabelled/man/figures/lifecycle-questioning.svg0000644000176200001440000000171414357761455021304 0ustar liggesuserslifecyclelifecyclequestioningquestioning labelled/man/figures/lifecycle-stable.svg0000644000176200001440000000167414357761455020216 0ustar liggesuserslifecyclelifecyclestablestable labelled/man/figures/logo.svg0000644000176200001440000003237114736716451015742 0ustar liggesusers image/svg+xml RStudio_Hex 2016 v7 outlines RStudio_Hex 2016 v7 outlines labelled/man/figures/lifecycle-experimental.svg0000644000176200001440000000171614357761455021436 0ustar liggesuserslifecyclelifecycleexperimentalexperimental labelled/man/figures/lifecycle-deprecated.svg0000644000176200001440000000171214357761455021035 0ustar liggesuserslifecyclelifecycledeprecateddeprecated labelled/man/figures/lifecycle-superseded.svg0000644000176200001440000000171314357761455021101 0ustar liggesusers lifecyclelifecyclesupersededsuperseded labelled/man/figures/logo.png0000644000176200001440000002545014736716451015727 0ustar liggesusersPNG  IHDR6sBIT|d pHYsi> tEXtSoftwarewww.inkscape.org<"tEXtTitleRStudio_Hex 2016 v7 outlines}p} IDATxwxǿgfw%TH B "X.U@h P,WV:UObL4^B$lߙc%$;[|'3ov{}X" SgtYǎ-1Y iiYc,пv%G10g-gYpDb3ftDuS#sD @i- BQ uBEd.iBBaʍR3L5$S*-As 8}գZm<Cu'LYԘ6YJb1WĢ? % (16,%\\Z}mq1D BJt jJvIxY%D6)c(KzԶT8i8}5#8!_?Xд)Q'(Ro:IP| zg6fʓVeI%v(@U!jEAFpz-@%WTú@"EMgM@jjԄTacGPpj$A(;BЊ:!}0JR\'-)jBt2D+8bޖ`HĹ%Ywް'D3jV"%4RULm*j=HV]i-FЈo%'*@¤АPVQ =ޖY57:9.YRCP7s7{Ք^r[ daqΊuh%ZfާMRR&A1TzZ-rܒ}mWs*Eav%OJB)ǃ@*?Y8xr %Dx"OZ.)%UIʽﶶVsu32ǁҥ+g%Ay^z,l-%/3{ VZ=dq-:%DaPml5`{> A0Z^MgMeAp@/#4D 14 5 wJfONuѽC4Y9aX47zʪؾN^@)y8_C; ])fWDѳ (m$4Lva*x*LUޭZT 9.}:ah$Ꟃɱ;q }s+.JH$G_x 蚀=Q\.xOGl=)qQpss1lI۞'%}TrhATʥE܅. 3 钀){?U@EBn%x Lݰ ? BF0B S!f}tR#l®O:vj"@ U*`/OXS- %Q*qPB@~+ !?0/Wt腚]EԉzyzԶTśeK(lX4=:8?[Ps/Z)[c`In& QQm`Blӛ՛pڈ*=ʪ գJK%U/ro.hVO^+y;V]쬶$3( zQW]=x9~< D!~)?%c@ƿ& L*WeU(ֈj#*uFTT"&Pɔ2P v~P3{|`I(iА) )uDUgbͷA)0g2Ps"GoLA9N_*éKe8} %UX\+:A:OK\+蛾zq*Oy=GrѓUB!pfHm' g.V}DX:}2:W غ r8uԃkEpF-Y/9Nd~ycǎalƌnD o)R[bk$urSbYx{*yi|9tV!hQ\J ts|,^*YՎD,؞!*CAfOD^o>qCbIjeVjVՔVI+q%GAƧũDi+ EP@E5Z$Rwu6RE3'#"xzXsgP4<QRrVfIzq[lBjo̠U(!jlG-Xq<nIbf= !*ۜIiQ5tRV_;,zI AHq+֤ԶT#׹p=vk-b5BҊjc3(f4ew2&T hY O`KH^4G"n{O\rĘp uzᬶI(B@Y&EM96/Pp +a!D)1k{@|T(a J9v#%>kAMúcWp-!SؖohaQV4#sSL C )[tIl{ƺ~3s8}u/LڧnGr|F61_: ofFz4"q`<x[pFla#3x\3'[_lh}u Mš8k_n|TM"1%*prG60a <{; }ig䴈Mû#fnր.R+lxNJI0a{NuA֭;D7 c~ |\bNo6ņ-nJo{Ndv]fɄgCm"=){$6y\8z rJp\_ŒmWĊ9pnBI/G`n0q2*6Ql#5djp ߆ߍ9K*t}2mA_(}D%EcaQz0Q.H[nr[з/Z񿝹pY&:a/<{oÜIW0a;D]K[ꣴR;_.o趠/Tb=xng| &j'>vbI%n_NgIł{R%=<`v'niA„ d߉Kt*`o?q>m b&vjEP V}KzN{P AңC ym>< ozV1W#[\еDqnA7b75R3QlA SBĹC߱s n[O룙#ŗFC%᭰[Bg q \y &jx*l&惭|!x:bq%&-\ׇ)(ַY2lݍ>i:o}݌=q :c(R;q s\.%mAME?[Ĥxu6lm3p7AhIa3A;DCZBL0Q6k@s t䋟=H[y?Fl&a36{0Q760Q7 ZL͈'fu3#ELОD#l&han!6w0Q =L-L]a3A&>vzq 0Qji~0&jFD:A5#`fLԌt0Q3&jFD:A5#`fLԌt0Q32wGc+~;_:o/@|MѿK<uKoW 92vFr\}9[,66Ű^;9־}b)v^l[ZLԓ`-CmWQFj[ (Z^t ۛww)noyvѽaDFD:A5#`fV 9PEJ"! T!޿Z!_U)G"R83[fH ØA]0G"kD#N~=UhR]3nGFJ-\^Ǐ{NaWpܭbĤeD{?&{ccQ߶#[ ('&#zclhbwE|sԭX}pdN B\p_z+? I_qgZ_(uaHйc?=_txe X&SSv>̟$$ńcrj_%?R7|j8^DY)xxp5~k?pRY^DU7;rX #fx'ou3?k3ڞ=y}R`Öo/Bx5-<֡h^ヤ&!'RO:v*gIENqlžvC?"ތ[oDl:.@cT'd|/Ŷ?`|ݼ6)(>q6>xM}<5N\,[A<>q8^{j;%DanaI>9凮GD ~~Ke9tޛ.M7Z!cAoĖg~<=b0Irx5[/}j @ώ1) WP x.* vaߠn{ ]GTH눞۷\@~\u]h@z)5wa tt-Uz>ڼϾ+9uRwh?;]ˉ9ZuSEi`NJ wmCH)~{an 7 氽ys|q}:V˱sP #tp%ݏ~); LB .`|PxaֽePwǻ}]"T۱kkF:WgxR#+KF " EW_SQ@s8xlySGݷ gׂ{RniOkzmG[R4ֽ0ݒ9W#g@)ux8"ūwJlBKZQǮ!J)$EVʱ~TtT} VڳXcvTŕj?JVNzP TVFyGU#ԝ;&B}cd3ТAЇbu-~?wm qJ$y TZ=\%U+@)p۷ k##zwt軽"h#coǑ nߕvÎs#g #C%u]4m1qú{dCxn]8!M)?fG;2;GvNEXtbr9(MIQR$te'婻FU6clGztq̣:+#zdc-CYP7 /=8ḟrJ+cmBO&s凮LJOҎzI9lw z':ЙB9lX4<ةax֊1Ow/rրKgluS1,+J!CX8K*1U&﫟h2]7!}G/r,\~)23q\1 f b"4kNx~SEۨ~)i*[}_=`+3ciڅ;9d`sE0Yؿ?J'qt9b#5عqu34Jˑ&ß\%Drb#: SOz99wOD8p@}.9%mWMڗ. 狟;AT`xH};9܁;W=tW U_ȏ{N:ۅj˵Ƚ U)pmH̝ivT;_h:}*hEmu:h1Qk xnNoS.B)߿]S{O\>g㸋ؔ} 7-"pӟ?.(bOޞ|L+̈́2 o7HHOMW2yMs b I p@ dNI&t2D\iir5Du@zNÏ* &i~3(ƍn }pFsԅOةRQiJAzpj"m V6+,/^ٲïⱺb3ft ZDnQRmb5+AFB@n_g8':z9,44 %>=s"FHҎ*a" j{RB|,,YLžɡԫeA: bփ", j$ Sqw_^ߏ6_8&un{֗A#Tqa 5 o(bWHKmSx`Dz>k GL{-e%Gkf)!:B T`p/hzԢF ^6r0p͡vA,*-mPDRFf@}RڱWC,&[Njj`3aNю5gcSYbH!i]\*W@T&'0<Ԥ!1 8[}%\\Z}<XG$K6(js#CD9·T[ QA{-ڶ?P|sYk-`LU=ϓwAἈrc6KPܢGbxQ9PRҎ*UV;Bp+70j{V \(!M6BM" m7^7-?:@OC5޼-+7"0hW8V ^%@<)qUҨ- WIJE)zQ-(޺V}ec2ХHҎ2[]`*!PiintNюUaWsЪE]K|zD,v.WZ*yFj.98j--*VՓU%~^zI(@9qjI~hW51f%8&%>H82z\ lx:U>Ъ.g [`vWԬ)wbL lifecyclelifecyclearchivedarchived labelled/man/figures/lifecycle-defunct.svg0000644000176200001440000000170414357761455020366 0ustar liggesuserslifecyclelifecycledefunctdefunct labelled/man/figures/lifecycle-soft-deprecated.svg0000644000176200001440000000172614357761455022013 0ustar liggesuserslifecyclelifecyclesoft-deprecatedsoft-deprecated labelled/man/figures/lifecycle-maturing.svg0000644000176200001440000000170614357761455020566 0ustar liggesuserslifecyclelifecyclematuringmaturing labelled/man/to_factor.Rd0000644000176200001440000001157614466735327015073 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/to_factor.R \name{to_factor} \alias{to_factor} \alias{to_factor.haven_labelled} \alias{to_factor.data.frame} \alias{unlabelled} \title{Convert input to a factor.} \usage{ to_factor(x, ...) \method{to_factor}{haven_labelled}( x, levels = c("labels", "values", "prefixed"), ordered = FALSE, nolabel_to_na = FALSE, sort_levels = c("auto", "none", "labels", "values"), decreasing = FALSE, drop_unused_labels = FALSE, user_na_to_na = FALSE, strict = FALSE, unclass = FALSE, explicit_tagged_na = FALSE, ... ) \method{to_factor}{data.frame}( x, levels = c("labels", "values", "prefixed"), ordered = FALSE, nolabel_to_na = FALSE, sort_levels = c("auto", "none", "labels", "values"), decreasing = FALSE, labelled_only = TRUE, drop_unused_labels = FALSE, strict = FALSE, unclass = FALSE, explicit_tagged_na = FALSE, ... ) unlabelled(x, ...) } \arguments{ \item{x}{Object to coerce to a factor.} \item{...}{Other arguments passed down to method.} \item{levels}{What should be used for the factor levels: the labels, the values or labels prefixed with values?} \item{ordered}{\code{TRUE} for ordinal factors, \code{FALSE} (default) for nominal factors.} \item{nolabel_to_na}{Should values with no label be converted to \code{NA}?} \item{sort_levels}{How the factor levels should be sorted? (see Details)} \item{decreasing}{Should levels be sorted in decreasing order?} \item{drop_unused_labels}{Should unused value labels be dropped? (applied only if \code{strict = FALSE})} \item{user_na_to_na}{Convert user defined missing values into \code{NA}?} \item{strict}{Convert to factor only if all values have a defined label?} \item{unclass}{If not converted to a factor (when \code{strict = TRUE}), convert to a character or a numeric factor by applying \code{\link[base:class]{base::unclass()}}?} \item{explicit_tagged_na}{Should tagged NA (cf. \code{\link[haven:tagged_na]{haven::tagged_na()}}) be kept as explicit factor levels?} \item{labelled_only}{for a data.frame, convert only labelled variables to factors?} } \description{ The base function \code{\link[base:factor]{base::as.factor()}} is not a generic, but this variant is. By default, \code{to_factor()} is a wrapper for \code{\link[base:factor]{base::as.factor()}}. Please note that \code{to_factor()} differs slightly from \code{\link[haven:as_factor]{haven::as_factor()}} method provided by \pkg{haven} package. \code{unlabelled(x)} is a shortcut for \code{to_factor(x, strict = TRUE, unclass = TRUE, labelled_only = TRUE)}. } \details{ If some values doesn't have a label, automatic labels will be created, except if \code{nolabel_to_na} is \code{TRUE}. If \code{sort_levels == 'values'}, the levels will be sorted according to the values of \code{x}. If \code{sort_levels == 'labels'}, the levels will be sorted according to labels' names. If \code{sort_levels == 'none'}, the levels will be in the order the value labels are defined in \code{x}. If some labels are automatically created, they will be added at the end. If \code{sort_levels == 'auto'}, \code{sort_levels == 'none'} will be used, except if some values doesn't have a defined label. In such case, \code{sort_levels == 'values'} will be applied. When applied to a data.frame, only labelled vectors are converted by default to a factor. Use \code{labelled_only = FALSE} to convert all variables to factors. \code{unlabelled()} is a shortcut for quickly removing value labels of a vector or of a data.frame. If all observed values have a value label, then the vector will be converted into a factor. Otherwise, the vector will be unclassed. If you want to remove value labels in all cases, use \code{\link[=remove_val_labels]{remove_val_labels()}}. } \examples{ v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) to_factor(v) to_factor(v, nolabel_to_na = TRUE) to_factor(v, "p") to_factor(v, sort_levels = "v") to_factor(v, sort_levels = "n") to_factor(v, sort_levels = "l") x <- labelled(c("H", "M", "H", "L"), c(low = "L", medium = "M", high = "H")) to_factor(x, ordered = TRUE) # Strict conversion v <- labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)) to_factor(v) to_factor(v, strict = TRUE) # Not converted because 3 does not have a label to_factor(v, strict = TRUE, unclass = TRUE) df <- data.frame( a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), c = labelled( c("a", "a", "b", "c"), labels = c(No = "a", Maybe = "b", Yes = "c") ), d = 1:4, e = factor(c("item1", "item2", "item1", "item2")), f = c("itemA", "itemA", "itemB", "itemB"), stringsAsFactors = FALSE ) if (require(dplyr)) { glimpse(df) glimpse(unlabelled(df)) } } labelled/man/remove_attributes.Rd0000644000176200001440000000130614466735327016644 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/remove_attributes.R \name{remove_attributes} \alias{remove_attributes} \title{Remove attributes} \usage{ remove_attributes(x, attributes) } \arguments{ \item{x}{an object} \item{attributes}{a character vector indicating attributes to remove} } \description{ This function removes specified attributes. When applied to a data.frame, it will also remove recursively the specified attributes to each column of the data.frame. } \examples{ \dontrun{ library(haven) path <- system.file("examples", "iris.sav", package = "haven") d <- read_sav(path) str(d) d <- remove_attributes(d, "format.spss") str(d) } } labelled/man/nolabel_to_na.Rd0000644000176200001440000000065714357761455015705 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/to_na.R \name{nolabel_to_na} \alias{nolabel_to_na} \title{Recode values with no label to NA} \usage{ nolabel_to_na(x) } \arguments{ \item{x}{Object to recode.} } \description{ For labelled variables, values with no label will be recoded to \code{NA}. } \examples{ v <- labelled(c(1, 2, 9, 1, 9), c(yes = 1, no = 2)) nolabel_to_na(v) } labelled/man/tagged_na_to_user_na.Rd0000644000176200001440000000246514357761455017237 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tagged_na.R \name{tagged_na_to_user_na} \alias{tagged_na_to_user_na} \alias{tagged_na_to_regular_na} \title{Convert tagged NAs into user NAs} \usage{ tagged_na_to_user_na(x, user_na_start = NULL) tagged_na_to_regular_na(x) } \arguments{ \item{x}{a vector or a data frame} \item{user_na_start}{minimum value of the new user na, if \code{NULL}, computed automatically (maximum of observed values + 1)} } \description{ \code{\link[=tagged_na_to_user_na]{tagged_na_to_user_na()}} is the opposite of \code{\link[=user_na_to_tagged_na]{user_na_to_tagged_na()}} and convert tagged \code{NA} into user defined missing values (see \code{\link[=labelled_spss]{labelled_spss()}}). } \details{ \code{\link[=tagged_na_to_regular_na]{tagged_na_to_regular_na()}} converts tagged NAs into regular NAs. } \examples{ x <- c(1:5, tagged_na("a"), tagged_na("z"), NA) x print_tagged_na(x) tagged_na_to_user_na(x) tagged_na_to_user_na(x, user_na_start = 10) y <- c(1, 0, 1, tagged_na("r"), 0, tagged_na("d")) val_labels(y) <- c( no = 0, yes = 1, "don't know" = tagged_na("d"), refusal = tagged_na("r") ) y tagged_na_to_user_na(y, user_na_start = 8) tagged_na_to_regular_na(y) tagged_na_to_regular_na(y) \%>\% is_tagged_na() } labelled/man/sort_val_labels.Rd0000644000176200001440000000125214466735327016254 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/val_labels.R \name{sort_val_labels} \alias{sort_val_labels} \title{Sort value labels} \usage{ sort_val_labels(x, according_to = c("values", "labels"), decreasing = FALSE) } \arguments{ \item{x}{A labelled vector or a data.frame} \item{according_to}{According to values or to labels?} \item{decreasing}{In decreasing order?} } \description{ Sort value labels according to values or to labels } \examples{ v <- labelled(c(1, 2, 3), c(maybe = 2, yes = 1, no = 3)) v sort_val_labels(v) sort_val_labels(v, decreasing = TRUE) sort_val_labels(v, "l") sort_val_labels(v, "l", TRUE) } labelled/man/var_label.Rd0000644000176200001440000001136414737243033015022 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/var_label.R \name{var_label} \alias{var_label} \alias{var_label.data.frame} \alias{var_label<-} \alias{get_variable_labels} \alias{set_variable_labels} \alias{label_attribute} \alias{get_label_attribute} \alias{set_label_attribute} \alias{label_attribute<-} \title{Get / Set a variable label} \usage{ var_label(x, ...) \method{var_label}{data.frame}( x, unlist = FALSE, null_action = c("keep", "fill", "skip", "na", "empty"), recurse = FALSE, ... ) var_label(x) <- value get_variable_labels(x, ...) set_variable_labels(.data, ..., .labels = NA, .strict = TRUE) label_attribute(x) get_label_attribute(x) set_label_attribute(x, value) label_attribute(x) <- value } \arguments{ \item{x}{a vector or a data.frame} \item{...}{name-value pairs of variable labels (see examples)} \item{unlist}{for data frames, return a named vector instead of a list} \item{null_action}{for data frames, by default \code{NULL} will be returned for columns with no variable label. Use \code{"fill"} to populate with the column name instead, \code{"skip"} to remove such values from the returned list, \code{"na"} to populate with \code{NA} or \code{"empty"} to populate with an empty string (\code{""}).} \item{recurse}{if \code{TRUE}, will apply \code{var_label()} on packed columns (see \code{\link[tidyr:pack]{tidyr::pack()}}) to return the variable labels of each sub-column; otherwise, the label of the group of columns will be returned.} \item{value}{a character string or \code{NULL} to remove the label For data frames, with \code{var_label()}, it could also be a named list or a character vector of same length as the number of columns in \code{x}.} \item{.data}{a data frame or a vector} \item{.labels}{variable labels to be applied to the data.frame, using the same syntax as \code{value} in \code{var_label(df) <- value}.} \item{.strict}{should an error be returned if some labels doesn't correspond to a column of \code{x}?} } \value{ \code{set_variable_labels()} will return an updated copy of \code{.data}. } \description{ Get / Set a variable label } \details{ \code{get_variable_labels()} is identical to \code{var_label()}. For data frames, if you are using \verb{var_label()<-} and if \code{value} is a named list, only elements whose name will match a column of the data frame will be taken into account. If \code{value} is a character vector, labels should be in the same order as the columns of the data.frame. If you are using \verb{label_attribute()<-} or \code{set_label_attribute()} on a data frame, the label attribute will be attached to the data frame itself, not to a column of the data frame. If you are using packed columns (see \code{\link[tidyr:pack]{tidyr::pack()}}), please read the dedicated vignette. } \note{ \code{set_variable_labels()} could be used with \pkg{dplyr} syntax. } \examples{ var_label(iris$Sepal.Length) var_label(iris$Sepal.Length) <- "Length of the sepal" \dontrun{ View(iris) } # To remove a variable label var_label(iris$Sepal.Length) <- NULL # To change several variable labels at once var_label(iris) <- c( "sepal length", "sepal width", "petal length", "petal width", "species" ) var_label(iris) var_label(iris) <- list( Petal.Width = "width of the petal", Petal.Length = "length of the petal", Sepal.Width = NULL, Sepal.Length = NULL ) var_label(iris) var_label(iris, null_action = "fill") var_label(iris, null_action = "skip") var_label(iris, unlist = TRUE) # if (require(dplyr)) { # adding some variable labels df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) \%>\% set_variable_labels(s1 = "Sex", s2 = "Yes or No?") var_label(df) # removing a variable label df <- df \%>\% set_variable_labels(s2 = NULL) var_label(df$s2) # Set labels from dictionary, e.g. as read from external file # One description is missing, one has no match description <- tibble( name = c( "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Something" ), label = c( "Sepal length", "Sepal width", "Petal length", "Petal width", "something" ) ) var_labels <- stats::setNames(as.list(description$label), description$name) iris_labelled <- iris \%>\% set_variable_labels(.labels = var_labels, .strict = FALSE) var_label(iris_labelled) # defining variable labels derived from variable names if (require(snakecase)) { iris <- iris \%>\% set_variable_labels(.labels = to_sentence_case(names(iris))) var_label(iris) } # example with a vector v <- 1:5 v <- v \%>\% set_variable_labels("a variable label") v v \%>\% set_variable_labels(NULL) } } labelled/man/test_datasets.Rd0000644000176200001440000000137114357761455015752 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data.R \docType{data} \name{x_haven_2.0} \alias{x_haven_2.0} \alias{x_spss_haven_2.0} \alias{spss_file} \alias{dta_file} \title{Datasets for testing} \format{ An object of class \code{haven_labelled} of length 6. An object of class \code{haven_labelled_spss} (inherits from \code{haven_labelled}) of length 10. An object of class \code{list} of length 13. An object of class \code{data.frame} with 47 rows and 6 columns. } \usage{ x_haven_2.0 x_spss_haven_2.0 spss_file dta_file } \description{ These datasets are used to test compatibility with foreign (spss_foreign), or haven_2.0 (x_haven_2.0, x_spss_haven_2.0) packages } \keyword{datasets} labelled/man/to_labelled.Rd0000644000176200001440000001036714737244525015352 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/to_labelled.R \name{to_labelled} \alias{to_labelled} \alias{to_labelled.data.frame} \alias{to_labelled.list} \alias{to_labelled.data.set} \alias{to_labelled.importer} \alias{foreign_to_labelled} \alias{memisc_to_labelled} \alias{to_labelled.factor} \title{Convert to labelled data} \usage{ to_labelled(x, ...) \method{to_labelled}{data.frame}(x, ...) \method{to_labelled}{list}(x, ...) \method{to_labelled}{data.set}(x, ...) \method{to_labelled}{importer}(x, ...) foreign_to_labelled(x) memisc_to_labelled(x) \method{to_labelled}{factor}(x, labels = NULL, .quiet = FALSE, ...) } \arguments{ \item{x}{Factor or dataset to convert to labelled data frame} \item{...}{Not used} \item{labels}{When converting a factor only: an optional named vector indicating how factor levels should be coded. If a factor level is not found in \code{labels}, it will be converted to \code{NA}.} \item{.quiet}{do not display warnings for prefixed factors with duplicated codes} } \value{ A tbl data frame or a labelled vector. } \description{ Convert a factor or data imported with \pkg{foreign} or \pkg{memisc} to labelled data. } \details{ \code{to_labelled()} is a general wrapper calling the appropriate sub-functions. \code{memisc_to_labelled()} converts a \code{memisc::data.set()}]` object created with \pkg{memisc} package to a labelled data frame. \code{foreign_to_labelled()} converts data imported with \code{\link[foreign:read.spss]{foreign::read.spss()}} or \code{\link[foreign:read.dta]{foreign::read.dta()}} from \pkg{foreign} package to a labelled data frame, i.e. using \code{\link[haven:labelled]{haven::labelled()}}. Factors will not be converted. Therefore, you should use \code{use.value.labels = FALSE} when importing with \code{\link[foreign:read.spss]{foreign::read.spss()}} or \code{convert.factors = FALSE} when importing with \code{\link[foreign:read.dta]{foreign::read.dta()}}. To convert correctly defined missing values imported with \code{\link[foreign:read.spss]{foreign::read.spss()}}, you should have used \code{to.data.frame = FALSE} and \code{use.missings = FALSE}. If you used the option \code{to.data.frame = TRUE}, meta data describing missing values will not be attached to the import. If you used \code{use.missings = TRUE}, missing values would have been converted to \code{NA}. So far, missing values defined in \strong{Stata} are always imported as \code{NA} by \code{\link[foreign:read.dta]{foreign::read.dta()}} and could not be retrieved by \code{foreign_to_labelled()}. If you convert a labelled vector into a factor with prefix, i.e. by using \code{\link[=to_factor]{to_factor(levels = "prefixed")}}, \code{to_labelled.factor()} is able to reconvert it to a labelled vector with same values and labels. } \examples{ \dontrun{ # from foreign library(foreign) sav <- system.file("files", "electric.sav", package = "foreign") df <- to_labelled(read.spss( sav, to.data.frame = FALSE, use.value.labels = FALSE, use.missings = FALSE )) # from memisc library(memisc) nes1948.por <- UnZip("anes/NES1948.ZIP", "NES1948.POR", package = "memisc") nes1948 <- spss.portable.file(nes1948.por) ds <- as.data.set(nes1948) df <- to_labelled(ds) } # Converting factors to labelled vectors f <- factor( c("yes", "yes", "no", "no", "don't know", "no", "yes", "don't know") ) to_labelled(f) to_labelled(f, c("yes" = 1, "no" = 2, "don't know" = 9)) to_labelled(f, c("yes" = 1, "no" = 2)) to_labelled(f, c("yes" = "Y", "no" = "N", "don't know" = "DK")) s1 <- labelled(c("M", "M", "F"), c(Male = "M", Female = "F")) labels <- val_labels(s1) f1 <- to_factor(s1) f1 to_labelled(f1) identical(s1, to_labelled(f1)) to_labelled(f1, labels) identical(s1, to_labelled(f1, labels)) l <- labelled( c(1, 1, 2, 2, 9, 2, 1, 9), c("yes" = 1, "no" = 2, "don't know" = 9) ) f <- to_factor(l, levels = "p") f to_labelled(f) identical(to_labelled(f), l) } \seealso{ \code{\link[haven:labelled]{haven::labelled()}}, \code{\link[foreign:read.spss]{foreign::read.spss()}}, \code{\link[foreign:read.dta]{foreign::read.dta()}}, \code{memisc::data.set()}, \code{memisc::importer}, \code{\link[=to_factor]{to_factor()}}. } labelled/man/unique_tagged_na.Rd0000644000176200001440000000355014357761455016403 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tagged_na.R \name{unique_tagged_na} \alias{unique_tagged_na} \alias{duplicated_tagged_na} \alias{order_tagged_na} \alias{sort_tagged_na} \title{Unique elements, duplicated, ordering and sorting with tagged NAs} \usage{ unique_tagged_na(x, fromLast = FALSE) duplicated_tagged_na(x, fromLast = FALSE) order_tagged_na( x, na.last = TRUE, decreasing = FALSE, method = c("auto", "shell", "radix"), na_decreasing = decreasing, untagged_na_last = TRUE ) sort_tagged_na( x, decreasing = FALSE, na.last = TRUE, na_decreasing = decreasing, untagged_na_last = TRUE ) } \arguments{ \item{x}{a vector} \item{fromLast}{logical indicating if duplication should be considered from the last} \item{na.last}{if \code{TRUE}, missing values in the data are put last; if \code{FALSE}, they are put first} \item{decreasing}{should the sort order be increasing or decreasing?} \item{method}{the method to be used, see \code{\link[base:order]{base::order()}}} \item{na_decreasing}{should the sort order for tagged NAs value be} \item{untagged_na_last}{should untagged \code{NA}s be sorted after tagged \code{NA}s? increasing or decreasing?} } \description{ These adaptations of \code{\link[base:unique]{base::unique()}}, \code{\link[base:duplicated]{base::duplicated()}}, \code{\link[base:order]{base::order()}} and \code{\link[base:sort]{base::sort()}} treats tagged NAs as distinct values. } \examples{ x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA) x \%>\% print_tagged_na() unique(x) \%>\% print_tagged_na() unique_tagged_na(x) \%>\% print_tagged_na() duplicated(x) duplicated_tagged_na(x) order(x) order_tagged_na(x) sort(x, na.last = TRUE) \%>\% print_tagged_na() sort_tagged_na(x) \%>\% print_tagged_na() } labelled/man/val_labels_to_na.Rd0000644000176200001440000000075514357761455016374 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/to_na.R \name{val_labels_to_na} \alias{val_labels_to_na} \title{Recode value labels to NA} \usage{ val_labels_to_na(x) } \arguments{ \item{x}{Object to recode.} } \description{ For labelled variables, values with a label will be recoded to \code{NA}. } \examples{ v <- labelled(c(1, 2, 9, 1, 9), c(dk = 9)) val_labels_to_na(v) } \seealso{ \code{\link[haven:zap_labels]{haven::zap_labels()}} } labelled/man/is_prefixed.Rd0000644000176200001440000000043714357761455015406 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/is_prefixed.R \name{is_prefixed} \alias{is_prefixed} \title{Check if a factor is prefixed} \usage{ is_prefixed(x) } \arguments{ \item{x}{a factor} } \description{ Check if a factor is prefixed } labelled/DESCRIPTION0000644000176200001440000000305114737436072013540 0ustar liggesusersPackage: labelled Type: Package Title: Manipulating Labelled Data Version: 2.14.0 Maintainer: Joseph Larmarange Authors@R: c( person("Joseph", "Larmarange", email = "joseph@larmarange.net", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7097-700X")), person("Daniel", "Ludecke", role = "ctb"), person("Hadley", "Wickham", role = "ctb"), person("Michal", "Bojanowski", role = "ctb"), person("François", "Briatte", role = "ctb") ) Description: Work with labelled data imported from 'SPSS' or 'Stata' with 'haven' or 'foreign'. This package provides useful functions to deal with "haven_labelled" and "haven_labelled_spss" classes introduced by 'haven' package. License: GPL (>= 3) Encoding: UTF-8 Depends: R (>= 3.2) Imports: haven (>= 2.4.1), cli, dplyr (>= 1.1.0), lifecycle, rlang (>= 1.1.0), vctrs, stringr, tidyr, tidyselect Suggests: testthat (>= 3.2.0), knitr, rmarkdown, questionr, snakecase, spelling Enhances: memisc URL: https://larmarange.github.io/labelled/, https://github.com/larmarange/labelled BugReports: https://github.com/larmarange/labelled/issues VignetteBuilder: knitr LazyData: true RoxygenNote: 7.3.2 Language: en-US Config/testthat/edition: 3 Config/Needs/check: memisc NeedsCompilation: no Packaged: 2025-01-08 08:12:06 UTC; josep Author: Joseph Larmarange [aut, cre] (), Daniel Ludecke [ctb], Hadley Wickham [ctb], Michal Bojanowski [ctb], François Briatte [ctb] Repository: CRAN Date/Publication: 2025-01-08 08:50:02 UTC