labelled/0000755000176200001440000000000014444603422012021 5ustar liggesuserslabelled/NAMESPACE0000644000176200001440000001324414444527456013260 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method("na_range<-",data.frame) S3method("na_range<-",default) S3method("na_range<-",factor) S3method("na_range<-",haven_labelled) S3method("na_values<-",data.frame) S3method("na_values<-",default) S3method("na_values<-",factor) S3method("na_values<-",haven_labelled) S3method("val_label<-",character) S3method("val_label<-",data.frame) S3method("val_label<-",default) S3method("val_label<-",factor) S3method("val_label<-",haven_labelled) S3method("val_label<-",numeric) S3method("val_labels<-",character) S3method("val_labels<-",data.frame) S3method("val_labels<-",default) S3method("val_labels<-",factor) S3method("val_labels<-",haven_labelled) S3method("val_labels<-",haven_labelled_spss) S3method("val_labels<-",numeric) S3method("var_label<-",data.frame) S3method("var_label<-",default) S3method(copy_labels,data.frame) S3method(copy_labels,default) S3method(copy_labels,haven_labelled) S3method(drop_unused_value_labels,data.frame) S3method(drop_unused_value_labels,default) S3method(drop_unused_value_labels,haven_labelled) S3method(na_range,data.frame) S3method(na_range,default) S3method(na_range,haven_labelled_spss) S3method(na_values,data.frame) S3method(na_values,default) S3method(na_values,haven_labelled_spss) S3method(names_prefixed_by_values,default) S3method(names_prefixed_by_values,list) S3method(nolabel_to_na,data.frame) S3method(nolabel_to_na,default) S3method(nolabel_to_na,haven_labelled) S3method(print,look_for) S3method(recode,haven_labelled) S3method(remove_attributes,data.frame) S3method(remove_attributes,default) S3method(remove_labels,data.frame) S3method(remove_labels,default) S3method(remove_labels,haven_labelled_spss) S3method(remove_user_na,data.frame) S3method(remove_user_na,default) S3method(remove_user_na,haven_labelled_spss) S3method(remove_val_labels,data.frame) S3method(remove_val_labels,default) S3method(remove_var_label,data.frame) S3method(remove_var_label,default) S3method(sort_val_labels,data.frame) S3method(sort_val_labels,default) S3method(sort_val_labels,haven_labelled) S3method(tagged_na_to_regular_na,data.frame) S3method(tagged_na_to_regular_na,default) S3method(tagged_na_to_regular_na,double) S3method(tagged_na_to_user_na,data.frame) S3method(tagged_na_to_user_na,default) S3method(tagged_na_to_user_na,double) S3method(to_character,data.frame) S3method(to_character,default) S3method(to_character,double) S3method(to_character,haven_labelled) S3method(to_factor,data.frame) S3method(to_factor,default) S3method(to_factor,factor) S3method(to_factor,haven_labelled) S3method(to_labelled,data.frame) S3method(to_labelled,data.set) S3method(to_labelled,factor) S3method(to_labelled,importer) S3method(to_labelled,list) S3method(update_labelled,data.frame) S3method(update_labelled,default) S3method(update_labelled,haven_labelled) S3method(update_labelled,haven_labelled_spss) S3method(update_labelled,labelled) S3method(user_na_to_na,data.frame) S3method(user_na_to_na,default) S3method(user_na_to_na,haven_labelled_spss) S3method(user_na_to_tagged_na,data.frame) S3method(user_na_to_tagged_na,default) S3method(user_na_to_tagged_na,haven_labelled_spss) S3method(val_label,data.frame) S3method(val_label,default) S3method(val_label,haven_labelled) S3method(val_labels,data.frame) S3method(val_labels,default) S3method(val_labels,haven_labelled) S3method(val_labels_to_na,data.frame) S3method(val_labels_to_na,default) S3method(val_labels_to_na,haven_labelled) S3method(var_label,data.frame) S3method(var_label,default) export("%>%") export("label_attribute<-") export("na_range<-") export("na_values<-") export("val_label<-") export("val_labels<-") export("var_label<-") export(add_value_labels) export(convert_list_columns_to_character) export(copy_labels) export(copy_labels_from) export(drop_unused_value_labels) export(duplicated_tagged_na) export(foreign_to_labelled) export(format_tagged_na) export(generate_dictionary) export(get_label_attribute) export(get_na_range) export(get_na_values) export(get_value_labels) export(get_variable_labels) export(is.labelled) export(is_prefixed) export(is_regular_na) export(is_tagged_na) export(is_user_na) export(label_attribute) export(labelled) export(labelled_spss) export(look_for) export(look_for_and_select) export(lookfor) export(lookfor_to_long_format) export(memisc_to_labelled) export(na_range) export(na_tag) export(na_values) export(names_prefixed_by_values) export(nolabel_to_na) export(order_tagged_na) export(print_labels) export(print_tagged_na) export(recode_if) export(remove_attributes) export(remove_labels) export(remove_user_na) export(remove_val_labels) export(remove_value_labels) export(remove_var_label) export(set_label_attribute) export(set_na_range) export(set_na_values) export(set_value_labels) export(set_variable_labels) export(sort_tagged_na) export(sort_val_labels) export(tagged_na) export(tagged_na_to_regular_na) export(tagged_na_to_user_na) export(to_character) export(to_factor) export(to_labelled) export(unique_tagged_na) export(unlabelled) export(update_labelled) export(user_na_to_na) export(user_na_to_regular_na) export(user_na_to_tagged_na) export(val_label) export(val_labels) export(val_labels_to_na) export(var_label) importFrom(dplyr,.data) importFrom(dplyr,`%>%`) importFrom(dplyr,recode) importFrom(haven,format_tagged_na) importFrom(haven,is.labelled) importFrom(haven,is_tagged_na) importFrom(haven,labelled) importFrom(haven,labelled_spss) importFrom(haven,na_tag) importFrom(haven,print_labels) importFrom(haven,print_tagged_na) importFrom(haven,tagged_na) importFrom(lifecycle,deprecate_soft) labelled/README.md0000644000176200001440000000514414411236022013274 0ustar liggesusers# labelled [![Project Status: Active - The project has reached a stable, usable state and is being actively developed.](https://www.repostatus.org/badges/0.1.0/active.svg)](https://www.repostatus.org/#active) [![Lifecycle: stable](https://img.shields.io/badge/lifecycle-stable-brightgreen.svg)](https://lifecycle.r-lib.org/articles/stages.html#stable) [![R-CMD-check](https://github.com/larmarange/labelled/actions/workflows/R-CMD-check.yaml/badge.svg)](https://github.com/larmarange/labelled/actions/workflows/R-CMD-check.yaml) [![CRAN\_Status\_Badge](https://www.r-pkg.org/badges/version/labelled)](https://cran.r-project.org/package=labelled) [![Downloads](https://cranlogs.r-pkg.org/badges/labelled)](https://cran.r-project.org/package=labelled) [![DOI](https://www.zenodo.org/badge/38772078.svg)](https://www.zenodo.org/badge/latestdoi/38772078) [![Codecov test coverage](https://codecov.io/gh/larmarange/labelled/branch/main/graph/badge.svg)](https://app.codecov.io/gh/larmarange/labelled?branch=main) This package is built on the new classes `haven_labelled` and `haven_labelled_spss` introduced by `haven` package to handle labelled variables imported from SPSS, Stata and SAS. The `labelled` package propose several functions to manipulate such vectors and their metadata: variable labels, value labels and user-defined missing values. ## Installation & Documentation To install **stable version**: ``` install.packages("labelled") ``` Documentation of stable version: To install **development version**: ``` remotes::install_github("larmarange/labelled") ``` Documentation of development version: ## Introduction Read the vignette at ## Cheatsheet [![labelled cheatsheet](https://github.com/larmarange/labelled/raw/main/cheatsheet/labelled_cheatsheet.png)](https://github.com/larmarange/labelled/raw/main/cheatsheet/labelled_cheatsheet.pdf) ## Some general guidelines 1. Functions are intended to support `labelled` metadata structures only. However, `to_labelled()` method allows to convert metadata from **foreign** and **memisc** packages. 2. Functions should, by default, modify metadata only (i.e. classes and attributes), except if explicitly expressed by the user. labelled/data/0000755000176200001440000000000014357761455012750 5ustar liggesuserslabelled/data/x_spss_haven_2.0.rda0000644000176200001440000000034114357761455016515 0ustar liggesusersBZh91AY&SYHPPԀ0@`0`SAL 6i&HLh@ 0$T&i 8)&' $  z@hpj! v0CH `OT?\t\ZEvbErJ+>ٝL  ъg c<7>Fe4^R# 0 j,?H]B@~Cllabelled/data/x_haven_2.0.rda0000644000176200001440000000024414357761455015447 0ustar liggesusersBZh91AY&SY﹕b@P@O@ ShhPHG 4Bʰh~4M$Ma `4hP0"Tⓕ>X̻] |՜h$4 ||aa]BBgTlabelled/data/dta_file.rda0000644000176200001440000000235314357761455015212 0ustar liggesusersBZh91AY&SYHk}B0MbYq !P-boM"i m@z4i=F'y'm  zFj2DڃC#AF'MILjhbhm@@Ѡ@"$P52=M4"i`#d4hh h FAL#!aL#!LA2`e14Qi4hh4hi 4ACLA"V?UFKL%_" ۸P!Hd@ЋPHb꨻pl\mIJ A$&8"gZKPPZmȋjWHp&g}5:B$ ATd& 8$(6/ . :78j @VR ؐV! @7(PmK X\% K -׆QfV(10xEA1.lQ.@_ Fqb& *ՠ^SO*bw7͔+{|&PPs0!9sx{  Q,^d(؅Pi,'!3{~m{v1s kZq RMc^RBa{hiɺu}31q])HnʒhnI58QDN n~w%6;QnCM2!9LrE1[{yVJimIxsFv^2 Y4i Gi_3aUh*Q YսY"v}[uj%^H} "PtRz:SeE AEYKׄ#VI@.堸`PP:E<׍>&/!&sб>F5+$ӏN{#$o;] LLiA&ĠM(.@K,u@E &$ʺ@0@4/L:AA QQM#L"/Cațݑ|Gs .cb:lm;@2l 'v|lMbE͂e`JZM68;[ns wNI(8N/Z gTXyXJ!b),CnZL} E+jIy7ޜ37onZfx9 DdA5 9} .ΏrD%/O:etLh.P*"*Lh!10"i"&](*WS^#vn/3߼6G )z#hEG w UFY;ϙ=~Wzj{s <(r zm/^*̜=+ Z zNfv nhTw HwOzTaL9Ht2s:3O]].].zwכz')cܓ0`;/hlD// ^\0FF-cږFxq!'5㳍ݤrHzy:;4NHG.:3 g 4UMie/u4FR,mwnSE@i2Zqb=}nƯQX-GcWʷi[ɶ۸גW8N݊41n-],WcW;:?GvոM%ccv"Ē] IH]bb*9 5]##E(H (,&Lf2F A36A32Lc.8g_q뮸ƺmM].A),DCjĠlH`fMHdXfDfe`Ҳ,k7kJQt7s떹5CRED%b &-&" ioUcF4j#QbhLIbTICn1 X+--+FFj\mFƴ 1{Hta<xX5c׿Akɔ"0b A߅l!t3AYd `]%ηc9WR'.t7v^@g(8! gOz`* @6gN[J#e?۽!Ld7c7^oWa%΍5%Od1 1IK?w7tq1O1L0̈) fEg6fFڟ,3?)Ȇ 2\J*9gCw610`j_NwB KW>' + ]p@DcΪS.p vlabelled/man/0000755000176200001440000000000014444527456012610 5ustar liggesuserslabelled/man/names_prefixed_by_values.Rd0000644000176200001440000000126414357761455020146 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/val_labels.R \name{names_prefixed_by_values} \alias{names_prefixed_by_values} \title{Turn a named vector into a vector of names prefixed by values} \usage{ names_prefixed_by_values(x) } \arguments{ \item{x}{vector to be prefixed} } \description{ Turn a named vector into a vector of names prefixed by values } \examples{ df <- dplyr::tibble( c1 = labelled(c("M", "M", "F"), c(Male = "M", Female = "F")), c2 = labelled(c(1, 1, 2), c(Yes = 1, No = 2)) ) val_labels(df$c1) val_labels(df$c1) \%>\% names_prefixed_by_values() val_labels(df) val_labels(df) \%>\% names_prefixed_by_values() } labelled/man/recode_if.Rd0000644000176200001440000000234214357761455015021 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/recode_if.R \name{recode_if} \alias{recode_if} \title{Recode some values based on condition} \usage{ recode_if(x, condition, true) } \arguments{ \item{x}{vector to be recoded} \item{condition}{logical vector of same length as \code{x}} \item{true}{values to use for \code{TRUE} values of \code{condition}. It must be either the same length as \code{x}, or length 1.} } \value{ Returns \code{x} with values replaced by \code{true} when \code{condition} is \code{TRUE} and unchanged when \code{condition} is \code{FALSE} or \code{NA}. Variable and value labels are preserved unchanged. } \description{ Recode some values based on condition } \examples{ v <- labelled(c(1,2,2,9), c(yes = 1, no = 2)) v \%>\% recode_if(v == 9, NA) if (require(dplyr)) { df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 2, 1)) \%>\% set_value_labels( s1 = c(Male = "M", Female = "F"), s2 = c(A = 1, B = 2) ) \%>\% set_variable_labels(s1 = "Gender", s2 = "Group") df <- df \%>\% mutate( s3 = s2 \%>\% recode_if(s1 == "F", 2), s4 = s2 \%>\% recode_if(s1 == "M", s2 + 10) ) df df \%>\% look_for() } } labelled/man/tagged_na_to_user_na.Rd0000644000176200001440000000246514357761455017237 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tagged_na.R \name{tagged_na_to_user_na} \alias{tagged_na_to_user_na} \alias{tagged_na_to_regular_na} \title{Convert tagged NAs into user NAs} \usage{ tagged_na_to_user_na(x, user_na_start = NULL) tagged_na_to_regular_na(x) } \arguments{ \item{x}{a vector or a data frame} \item{user_na_start}{minimum value of the new user na, if \code{NULL}, computed automatically (maximum of observed values + 1)} } \description{ \code{\link[=tagged_na_to_user_na]{tagged_na_to_user_na()}} is the opposite of \code{\link[=user_na_to_tagged_na]{user_na_to_tagged_na()}} and convert tagged \code{NA} into user defined missing values (see \code{\link[=labelled_spss]{labelled_spss()}}). } \details{ \code{\link[=tagged_na_to_regular_na]{tagged_na_to_regular_na()}} converts tagged NAs into regular NAs. } \examples{ x <- c(1:5, tagged_na("a"), tagged_na("z"), NA) x print_tagged_na(x) tagged_na_to_user_na(x) tagged_na_to_user_na(x, user_na_start = 10) y <- c(1, 0, 1, tagged_na("r"), 0, tagged_na("d")) val_labels(y) <- c( no = 0, yes = 1, "don't know" = tagged_na("d"), refusal = tagged_na("r") ) y tagged_na_to_user_na(y, user_na_start = 8) tagged_na_to_regular_na(y) tagged_na_to_regular_na(y) \%>\% is_tagged_na() } labelled/man/test_datasets.Rd0000644000176200001440000000137114357761455015752 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/data.R \docType{data} \name{x_haven_2.0} \alias{x_haven_2.0} \alias{x_spss_haven_2.0} \alias{spss_file} \alias{dta_file} \title{Datasets for testing} \format{ An object of class \code{haven_labelled} of length 6. An object of class \code{haven_labelled_spss} (inherits from \code{haven_labelled}) of length 10. An object of class \code{list} of length 13. An object of class \code{data.frame} with 47 rows and 6 columns. } \usage{ x_haven_2.0 x_spss_haven_2.0 spss_file dta_file } \description{ These datasets are used to test compatibility with foreign (spss_foreign), or haven_2.0 (x_haven_2.0, x_spss_haven_2.0) packages } \keyword{datasets} labelled/man/remove_labels.Rd0000644000176200001440000000362514357761455015726 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/remove_labels.R \name{remove_labels} \alias{remove_labels} \alias{remove_var_label} \alias{remove_val_labels} \alias{remove_user_na} \title{Remove variable label, value labels and user defined missing values} \usage{ remove_labels( x, user_na_to_na = FALSE, keep_var_label = FALSE, user_na_to_tagged_na = FALSE ) remove_var_label(x) remove_val_labels(x) remove_user_na(x, user_na_to_na = FALSE, user_na_to_tagged_na = FALSE) } \arguments{ \item{x}{A vector or a data frame.} \item{user_na_to_na}{Convert user defined missing values into \code{NA}?} \item{keep_var_label}{Keep variable label?} \item{user_na_to_tagged_na}{Convert user defined missing values into tagged \code{NA}? It could be applied only to numeric vectors. Note that integer labelled vectors will be converted to double labelled vectors.} } \description{ Use \code{remove_var_label()} to remove variable label, \code{remove_val_labels()} to remove value labels, \code{remove_user_na()} to remove user defined missing values (\emph{na_values} and \emph{na_range}) and \code{remove_labels()} to remove all. } \details{ Be careful with \code{remove_user_na()} and \code{remove_labels()}, user defined missing values will not be automatically converted to \code{NA}, except if you specify \code{user_na_to_na = TRUE}. \code{user_na_to_na(x)} is an equivalent of \code{remove_user_na(x, user_na_to_na = TRUE)}. If you prefer to convert variables with value labels into factors, use \code{\link[=to_factor]{to_factor()}} or use \code{\link[=unlabelled]{unlabelled()}}. } \examples{ x <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) var_label(x) <- "A variable" x remove_labels(x) remove_labels(x, user_na_to_na = TRUE) remove_user_na(x, user_na_to_na = TRUE) remove_user_na(x, user_na_to_tagged_na = TRUE) } labelled/man/to_factor.Rd0000644000176200001440000001155414357761455015067 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/to_factor.R \name{to_factor} \alias{to_factor} \alias{to_factor.haven_labelled} \alias{to_factor.data.frame} \alias{unlabelled} \title{Convert input to a factor.} \usage{ to_factor(x, ...) \method{to_factor}{haven_labelled}( x, levels = c("labels", "values", "prefixed"), ordered = FALSE, nolabel_to_na = FALSE, sort_levels = c("auto", "none", "labels", "values"), decreasing = FALSE, drop_unused_labels = FALSE, user_na_to_na = FALSE, strict = FALSE, unclass = FALSE, explicit_tagged_na = FALSE, ... ) \method{to_factor}{data.frame}( x, levels = c("labels", "values", "prefixed"), ordered = FALSE, nolabel_to_na = FALSE, sort_levels = c("auto", "none", "labels", "values"), decreasing = FALSE, labelled_only = TRUE, drop_unused_labels = FALSE, strict = FALSE, unclass = FALSE, explicit_tagged_na = FALSE, ... ) unlabelled(x, ...) } \arguments{ \item{x}{Object to coerce to a factor.} \item{...}{Other arguments passed down to method.} \item{levels}{What should be used for the factor levels: the labels, the values or labels prefixed with values?} \item{ordered}{\code{TRUE} for ordinal factors, \code{FALSE} (default) for nominal factors.} \item{nolabel_to_na}{Should values with no label be converted to \code{NA}?} \item{sort_levels}{How the factor levels should be sorted? (see Details)} \item{decreasing}{Should levels be sorted in decreasing order?} \item{drop_unused_labels}{Should unused value labels be dropped? (applied only if \code{strict = FALSE})} \item{user_na_to_na}{Convert user defined missing values into \code{NA}?} \item{strict}{Convert to factor only if all values have a defined label?} \item{unclass}{If not converted to a factor (when \code{strict = TRUE}), convert to a character or a numeric factor by applying \code{\link[base:class]{base::unclass()}}?} \item{explicit_tagged_na}{Should tagged NA (cf. \code{\link[haven:tagged_na]{haven::tagged_na()}}) be kept as explicit factor levels?} \item{labelled_only}{for a data.frame, convert only labelled variables to factors?} } \description{ The base function \code{\link[base:factor]{base::as.factor()}} is not a generic, but this variant is. By default, \code{to_factor()} is a wrapper for \code{\link[base:factor]{base::as.factor()}}. Please note that \code{to_factor()} differs slightly from \code{\link[haven:as_factor]{haven::as_factor()}} method provided by \pkg{haven} package. \code{unlabelled(x)} is a shortcut for \code{to_factor(x, strict = TRUE, unclass = TRUE, labelled_only = TRUE)}. } \details{ If some values doesn't have a label, automatic labels will be created, except if \code{nolabel_to_na} is \code{TRUE}. If \code{sort_levels == 'values'}, the levels will be sorted according to the values of \code{x}. If \code{sort_levels == 'labels'}, the levels will be sorted according to labels' names. If \code{sort_levels == 'none'}, the levels will be in the order the value labels are defined in \code{x}. If some labels are automatically created, they will be added at the end. If \code{sort_levels == 'auto'}, \code{sort_levels == 'none'} will be used, except if some values doesn't have a defined label. In such case, \code{sort_levels == 'values'} will be applied. When applied to a data.frame, only labelled vectors are converted by default to a factor. Use \code{labelled_only = FALSE} to convert all variables to factors. \code{unlabelled()} is a shortcut for quickly removing value labels of a vector or of a data.frame. If all observed values have a value label, then the vector will be converted into a factor. Otherwise, the vector will be unclassed. If you want to remove value labels in all cases, use \code{\link[=remove_val_labels]{remove_val_labels()}}. } \examples{ v <- labelled(c(1,2,2,2,3,9,1,3,2,NA), c(yes = 1, no = 3, "don't know" = 9)) to_factor(v) to_factor(v, nolabel_to_na = TRUE) to_factor(v, 'p') to_factor(v, sort_levels = 'v') to_factor(v, sort_levels = 'n') to_factor(v, sort_levels = 'l') x <- labelled(c('H', 'M', 'H', 'L'), c(low = 'L', medium = 'M', high = 'H')) to_factor(x, ordered = TRUE) # Strict conversion v <- labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)) to_factor(v) to_factor(v, strict = TRUE) # Not converted because 3 does not have a label to_factor(v, strict = TRUE, unclass = TRUE) df <- data.frame( a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), c = labelled( c("a", "a", "b", "c"), labels = c(No = "a", Maybe = "b", Yes = "c") ), d = 1:4, e = factor(c("item1", "item2", "item1", "item2")), f = c("itemA", "itemA", "itemB", "itemB"), stringsAsFactors = FALSE ) if (require(dplyr)) { glimpse(df) glimpse(unlabelled(df)) } } labelled/man/sort_val_labels.Rd0000644000176200001440000000125214357761455016254 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/val_labels.R \name{sort_val_labels} \alias{sort_val_labels} \title{Sort value labels} \usage{ sort_val_labels(x, according_to = c("values", "labels"), decreasing = FALSE) } \arguments{ \item{x}{A labelled vector or a data.frame} \item{according_to}{According to values or to labels?} \item{decreasing}{In decreasing order?} } \description{ Sort value labels according to values or to labels } \examples{ v <- labelled(c(1, 2, 3), c(maybe = 2, yes = 1, no = 3)) v sort_val_labels(v) sort_val_labels(v, decreasing = TRUE) sort_val_labels(v, 'l') sort_val_labels(v, 'l', TRUE) } labelled/man/remove_attributes.Rd0000644000176200001440000000130414357761455016642 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/remove_attributes.R \name{remove_attributes} \alias{remove_attributes} \title{Remove attributes} \usage{ remove_attributes(x, attributes) } \arguments{ \item{x}{an object} \item{attributes}{a character vector indicating attributes to remove} } \description{ This function removes specified attributes. When applied to a data.frame, it will also remove recursively the specified attributes to each column of the data.frame. } \examples{ \dontrun{ library(haven) path <- system.file("examples", "iris.sav", package = "haven") d <- read_sav(path) str(d) d <- remove_attributes(d, "format.spss") str(d)} } labelled/man/na_values.Rd0000644000176200001440000001140614444527456015056 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/na_values.R \name{na_values} \alias{na_values} \alias{na_values<-} \alias{na_range} \alias{na_range<-} \alias{get_na_values} \alias{get_na_range} \alias{set_na_values} \alias{set_na_range} \alias{is_user_na} \alias{is_regular_na} \alias{user_na_to_na} \alias{user_na_to_regular_na} \alias{user_na_to_tagged_na} \title{Get / Set SPSS missing values} \usage{ na_values(x) na_values(x) <- value na_range(x) na_range(x) <- value get_na_values(x) get_na_range(x) set_na_values(.data, ..., .values = NA, .strict = TRUE) set_na_range(.data, ..., .values = NA, .strict = TRUE) is_user_na(x) is_regular_na(x) user_na_to_na(x) user_na_to_regular_na(x) user_na_to_tagged_na(x) } \arguments{ \item{x}{A vector (or a data frame).} \item{value}{A vector of values that should also be considered as missing (for \code{na_values}) or a numeric vector of length two giving the (inclusive) extents of the range (for \code{na_values}, use \code{-Inf} and \code{Inf} if you want the range to be open ended).} \item{.data}{a data frame or a vector} \item{...}{name-value pairs of missing values (see examples)} \item{.values}{missing values to be applied to the data.frame, using the same syntax as \code{value} in \code{na_values(df) <- value} or \code{na_range(df) <- value}.} \item{.strict}{should an error be returned if some labels doesn't correspond to a column of \code{x}?} } \value{ \code{na_values()} will return a vector of values that should also be considered as missing. \code{na_range()} will return a numeric vector of length two giving the (inclusive) extents of the range. \code{set_na_values()} and \code{set_na_range()} will return an updated copy of \code{.data}. } \description{ Get / Set SPSS missing values } \details{ See \code{\link[haven:labelled_spss]{haven::labelled_spss()}} for a presentation of SPSS's user defined missing values. Note that \code{\link[base:NA]{base::is.na()}} will return \code{TRUE} for user defined missing values. It will also return \code{TRUE} for regular \code{NA} values. If you want to test if a specific value is a user NA but not a regular \code{NA}, use \code{is_user_na()}. If you want to test if a value is a regular \code{NA} but not a user NA, not a tagged NA, use \code{is_regular_na()}. You can use \code{\link[=user_na_to_na]{user_na_to_na()}} to convert user defined missing values to regular \code{NA}. Note that any value label attached to a user defined missing value will be lost. \code{\link[=user_na_to_regular_na]{user_na_to_regular_na()}} is a synonym of \code{\link[=user_na_to_na]{user_na_to_na()}}. The method \code{\link[=user_na_to_tagged_na]{user_na_to_tagged_na()}} will convert user defined missing values into \code{\link[haven:tagged_na]{haven::tagged_na()}}, preserving value labels. Please note that \code{\link[haven:tagged_na]{haven::tagged_na()}} are defined only for double vectors. Therefore, integer \code{haven_labelled_spss} vectors will be converted into double \code{haven_labelled} vectors; and \code{\link[=user_na_to_tagged_na]{user_na_to_tagged_na()}} cannot be applied to a character \code{haven_labelled_spss} vector. \code{\link[=tagged_na_to_user_na]{tagged_na_to_user_na()}} is the opposite of \code{\link[=user_na_to_tagged_na]{user_na_to_tagged_na()}} and convert tagged \code{NA} into user defined missing values. } \note{ \code{get_na_values()} is identical to \code{na_values()} and \code{get_na_range()} to \code{na_range()}. \code{set_na_values()} and \code{set_na_range()} could be used with \pkg{dplyr} syntax. } \examples{ v <- labelled(c(1,2,2,2,3,9,1,3,2,NA), c(yes = 1, no = 3, "don't know" = 9)) v na_values(v) <- 9 na_values(v) v is.na(v) # TRUE for the 6th and 10th values is_user_na(v) # TRUE only for the 6th value user_na_to_na(v) na_values(v) <- NULL v na_range(v) <- c(5, Inf) na_range(v) v user_na_to_na(v) user_na_to_tagged_na(v) # it is not recommended to mix user NAs and tagged NAs x <- c(NA, 9, tagged_na("a")) na_values(x) <- 9 x is.na(x) is_user_na(x) is_tagged_na(x) is_regular_na(x) if (require(dplyr)) { # setting value label and user NAs df <- tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) \%>\% set_value_labels(s2 = c(yes = 1, no = 2)) \%>\% set_na_values(s2 = 9) na_values(df) # removing missing values df <- df \%>\% set_na_values(s2 = NULL) df$s2 # example with a vector v <- 1:10 v <- v \%>\% set_na_values(5, 6, 7) v v \%>\% set_na_range(8, 10) v \%>\% set_na_range(.values = c(9, 10)) v \%>\% set_na_values(NULL) } } \seealso{ \code{\link[haven:labelled_spss]{haven::labelled_spss()}}, \code{\link[=user_na_to_na]{user_na_to_na()}} } labelled/man/drop_unused_value_labels.Rd0000644000176200001440000000075614357761455020156 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/drop_unused_value_labels.R \name{drop_unused_value_labels} \alias{drop_unused_value_labels} \title{Drop unused value labels} \usage{ drop_unused_value_labels(x) } \arguments{ \item{x}{A vector or a data frame.} } \description{ Drop value labels associated to a value not present in the data. } \examples{ x <- labelled(c(1, 2, 2, 1), c(yes = 1, no = 2, maybe = 3)) x drop_unused_value_labels(x) } labelled/man/figures/0000755000176200001440000000000014357761455014256 5ustar liggesuserslabelled/man/figures/lifecycle-defunct.svg0000644000176200001440000000170414357761455020366 0ustar liggesuserslifecyclelifecycledefunctdefunct labelled/man/figures/lifecycle-maturing.svg0000644000176200001440000000170614357761455020566 0ustar liggesuserslifecyclelifecyclematuringmaturing labelled/man/figures/lifecycle-archived.svg0000644000176200001440000000170714357761455020526 0ustar liggesusers lifecyclelifecyclearchivedarchived labelled/man/figures/lifecycle-soft-deprecated.svg0000644000176200001440000000172614357761455022013 0ustar liggesuserslifecyclelifecyclesoft-deprecatedsoft-deprecated labelled/man/figures/lifecycle-questioning.svg0000644000176200001440000000171414357761455021304 0ustar liggesuserslifecyclelifecyclequestioningquestioning labelled/man/figures/labelled.svg0000644000176200001440000003237114357761455016551 0ustar liggesusers image/svg+xml RStudio_Hex 2016 v7 outlines RStudio_Hex 2016 v7 outlines labelled/man/figures/lifecycle-superseded.svg0000644000176200001440000000171314357761455021101 0ustar liggesusers lifecyclelifecyclesupersededsuperseded labelled/man/figures/lifecycle-stable.svg0000644000176200001440000000167414357761455020216 0ustar liggesuserslifecyclelifecyclestablestable labelled/man/figures/lifecycle-experimental.svg0000644000176200001440000000171614357761455021436 0ustar liggesuserslifecyclelifecycleexperimentalexperimental labelled/man/figures/labelled.png0000644000176200001440000002545014357761455016536 0ustar liggesusersPNG  IHDR6sBIT|d pHYsi> tEXtSoftwarewww.inkscape.org<"tEXtTitleRStudio_Hex 2016 v7 outlines}p} IDATxwxǿgfw%TH B "X.U@h P,WV:UObL4^B$lߙc%$;[|'3ov{}X" SgtYǎ-1Y iiYc,пv%G10g-gYpDb3ftDuS#sD @i- BQ uBEd.iBBaʍR3L5$S*-As 8}գZm<Cu'LYԘ6YJb1WĢ? % (16,%\\Z}mq1D BJt jJvIxY%D6)c(KzԶT8i8}5#8!_?Xд)Q'(Ro:IP| zg6fʓVeI%v(@U!jEAFpz-@%WTú@"EMgM@jjԄTacGPpj$A(;BЊ:!}0JR\'-)jBt2D+8bޖ`HĹ%Ywް'D3jV"%4RULm*j=HV]i-FЈo%'*@¤АPVQ =ޖY57:9.YRCP7s7{Ք^r[ daqΊuh%ZfާMRR&A1TzZ-rܒ}mWs*Eav%OJB)ǃ@*?Y8xr %Dx"OZ.)%UIʽﶶVsu32ǁҥ+g%Ay^z,l-%/3{ VZ=dq-:%DaPml5`{> A0Z^MgMeAp@/#4D 14 5 wJfONuѽC4Y9aX47zʪؾN^@)y8_C; ])fWDѳ (m$4Lva*x*LUޭZT 9.}:ah$Ꟃɱ;q }s+.JH$G_x 蚀=Q\.xOGl=)qQpss1lI۞'%}TrhATʥE܅. 3 钀){?U@EBn%x Lݰ ? BF0B S!f}tR#l®O:vj"@ U*`/OXS- %Q*qPB@~+ !?0/Wt腚]EԉzyzԶTśeK(lX4=:8?[Ps/Z)[c`In& QQm`Blӛ՛pڈ*=ʪ գJK%U/ro.hVO^+y;V]쬶$3( zQW]=x9~< D!~)?%c@ƿ& L*WeU(ֈj#*uFTT"&Pɔ2P v~P3{|`I(iА) )uDUgbͷA)0g2Ps"GoLA9N_*éKe8} %UX\+:A:OK\+蛾zq*Oy=GrѓUB!pfHm' g.V}DX:}2:W غ r8uԃkEpF-Y/9Nd~ycǎalƌnD o)R[bk$urSbYx{*yi|9tV!hQ\J ts|,^*YՎD,؞!*CAfOD^o>qCbIjeVjVՔVI+q%GAƧũDi+ EP@E5Z$Rwu6RE3'#"xzXsgP4<QRrVfIzq[lBjo̠U(!jlG-Xq<nIbf= !*ۜIiQ5tRV_;,zI AHq+֤ԶT#׹p=vk-b5BҊjc3(f4ew2&T hY O`KH^4G"n{O\rĘp uzᬶI(B@Y&EM96/Pp +a!D)1k{@|T(a J9v#%>kAMúcWp-!SؖohaQV4#sSL C )[tIl{ƺ~3s8}u/LڧnGr|F61_: ofFz4"q`<x[pFla#3x\3'[_lh}u Mš8k_n|TM"1%*prG60a <{; }ig䴈Mû#fnր.R+lxNJI0a{NuA֭;D7 c~ |\bNo6ņ-nJo{Ndv]fɄgCm"=){$6y\8z rJp\_ŒmWĊ9pnBI/G`n0q2*6Ql#5djp ߆ߍ9K*t}2mA_(}D%EcaQz0Q.H[nr[з/Z񿝹pY&:a/<{oÜIW0a;D]K[ꣴR;_.o趠/Tb=xng| &j'>vbI%n_NgIł{R%=<`v'niA„ d߉Kt*`o?q>m b&vjEP V}KzN{P AңC ym>< ozV1W#[\еDqnA7b75R3QlA SBĹC߱s n[O룙#ŗFC%᭰[Bg q \y &jx*l&惭|!x:bq%&-\ׇ)(ַY2lݍ>i:o}݌=q :c(R;q s\.%mAME?[Ĥxu6lm3p7AhIa3A;DCZBL0Q6k@s t䋟=H[y?Fl&a36{0Q760Q7 ZL͈'fu3#ELОD#l&han!6w0Q =L-L]a3A&>vzq 0Qji~0&jFD:A5#`fLԌt0Q3&jFD:A5#`fLԌt0Q32wGc+~;_:o/@|MѿK<uKoW 92vFr\}9[,66Ű^;9־}b)v^l[ZLԓ`-CmWQFj[ (Z^t ۛww)noyvѽaDFD:A5#`fV 9PEJ"! T!޿Z!_U)G"R83[fH ØA]0G"kD#N~=UhR]3nGFJ-\^Ǐ{NaWpܭbĤeD{?&{ccQ߶#[ ('&#zclhbwE|sԭX}pdN B\p_z+? I_qgZ_(uaHйc?=_txe X&SSv>̟$$ńcrj_%?R7|j8^DY)xxp5~k?pRY^DU7;rX #fx'ou3?k3ڞ=y}R`Öo/Bx5-<֡h^ヤ&!'RO:v*gIENqlžvC?"ތ[oDl:.@cT'd|/Ŷ?`|ݼ6)(>q6>xM}<5N\,[A<>q8^{j;%DanaI>9凮GD ~~Ke9tޛ.M7Z!cAoĖg~<=b0Irx5[/}j @ώ1) WP x.* vaߠn{ ]GTH눞۷\@~\u]h@z)5wa tt-Uz>ڼϾ+9uRwh?;]ˉ9ZuSEi`NJ wmCH)~{an 7 氽ys|q}:V˱sP #tp%ݏ~); LB .`|PxaֽePwǻ}]"T۱kkF:WgxR#+KF " EW_SQ@s8xlySGݷ gׂ{RniOkzmG[R4ֽ0ݒ9W#g@)ux8"ūwJlBKZQǮ!J)$EVʱ~TtT} VڳXcvTŕj?JVNzP TVFyGU#ԝ;&B}cd3ТAЇbu-~?wm qJ$y TZ=\%U+@)p۷ k##zwt軽"h#coǑ nߕvÎs#g #C%u]4m1qú{dCxn]8!M)?fG;2;GvNEXtbr9(MIQR$te'婻FU6clGztq̣:+#zdc-CYP7 /=8ḟrJ+cmBO&s凮LJOҎzI9lw z':ЙB9lX4<ةax֊1Ow/rրKgluS1,+J!CX8K*1U&﫟h2]7!}G/r,\~)23q\1 f b"4kNx~SEۨ~)i*[}_=`+3ciڅ;9d`sE0Yؿ?J'qt9b#5عqu34Jˑ&ß\%Drb#: SOz99wOD8p@}.9%mWMڗ. 狟;AT`xH};9܁;W=tW U_ȏ{N:ۅj˵Ƚ U)pmH̝ivT;_h:}*hEmu:h1Qk xnNoS.B)߿]S{O\>g㸋ؔ} 7-"pӟ?.(bOޞ|L+̈́2 o7HHOMW2yMs b I p@ dNI&t2D\iir5Du@zNÏ* &i~3(ƍn }pFsԅOةRQiJAzpj"m V6+,/^ٲïⱺb3ft ZDnQRmb5+AFB@n_g8':z9,44 %>=s"FHҎ*a" j{RB|,,YLžɡԫeA: bփ", j$ Sqw_^ߏ6_8&un{֗A#Tqa 5 o(bWHKmSx`Dz>k GL{-e%Gkf)!:B T`p/hzԢF ^6r0p͡vA,*-mPDRFf@}RڱWC,&[Njj`3aNю5gcSYbH!i]\*W@T&'0<Ԥ!1 8[}%\\Z}<XG$K6(js#CD9·T[ QA{-ڶ?P|sYk-`LU=ϓwAἈrc6KPܢGbxQ9PRҎ*UV;Bp+70j{V \(!M6BM" m7^7-?:@OC5޼-+7"0hW8V ^%@<)qUҨ- WIJE)zQ-(޺V}ec2ХHҎ2[]`*!PiintNюUaWsЪE]K|zD,v.WZ*yFj.98j--*VՓU%~^zI(@9qjI~hW51f%8&%>H82z\ lx:U>Ъ.g [`vWԬ)wbLlifecyclelifecycledeprecateddeprecated labelled/man/reexports.Rd0000644000176200001440000000175014357761455015137 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/labelled.R, R/tagged_na.R \docType{import} \name{reexports} \alias{reexports} \alias{labelled} \alias{is.labelled} \alias{labelled_spss} \alias{print_labels} \alias{\%>\%} \alias{tagged_na} \alias{na_tag} \alias{is_tagged_na} \alias{format_tagged_na} \alias{print_tagged_na} \title{Objects exported from other packages} \keyword{internal} \description{ These objects are imported from other packages. Follow the links below to see their documentation. \describe{ \item{dplyr}{\code{\link[dplyr:reexports]{\%>\%}}} \item{haven}{\code{\link[haven:tagged_na]{format_tagged_na}}, \code{\link[haven:labelled]{is.labelled}}, \code{\link[haven:tagged_na]{is_tagged_na}}, \code{\link[haven]{labelled}}, \code{\link[haven]{labelled_spss}}, \code{\link[haven:tagged_na]{na_tag}}, \code{\link[haven]{print_labels}}, \code{\link[haven:tagged_na]{print_tagged_na}}, \code{\link[haven]{tagged_na}}} }} labelled/man/nolabel_to_na.Rd0000644000176200001440000000065714357761455015705 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/to_na.R \name{nolabel_to_na} \alias{nolabel_to_na} \title{Recode values with no label to NA} \usage{ nolabel_to_na(x) } \arguments{ \item{x}{Object to recode.} } \description{ For labelled variables, values with no label will be recoded to \code{NA}. } \examples{ v <- labelled(c(1, 2, 9, 1, 9), c(yes = 1, no = 2)) nolabel_to_na(v) } labelled/man/look_for.Rd0000644000176200001440000001225714357761455014722 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/lookfor.R \name{look_for} \alias{look_for} \alias{lookfor} \alias{generate_dictionary} \alias{print.look_for} \alias{look_for_and_select} \alias{convert_list_columns_to_character} \alias{lookfor_to_long_format} \title{Look for keywords variable names and descriptions / Create a data dictionary} \source{ Inspired by the \code{lookfor} command in Stata. } \usage{ look_for( data, ..., labels = TRUE, values = TRUE, ignore.case = TRUE, details = c("basic", "none", "full") ) lookfor( data, ..., labels = TRUE, values = TRUE, ignore.case = TRUE, details = c("basic", "none", "full") ) generate_dictionary( data, ..., labels = TRUE, values = TRUE, ignore.case = TRUE, details = c("basic", "none", "full") ) \method{print}{look_for}(x, ...) look_for_and_select( data, ..., labels = TRUE, values = TRUE, ignore.case = TRUE ) convert_list_columns_to_character(x) lookfor_to_long_format(x) } \arguments{ \item{data}{a data frame or a survey object} \item{...}{optional list of keywords, a character string (or several character strings), which can be formatted as a regular expression suitable for a \code{\link[base:grep]{base::grep()}} pattern, or a vector of keywords; displays all variables if not specified} \item{labels}{whether or not to search variable labels (descriptions); \code{TRUE} by default} \item{values}{whether or not to search within values (factor levels or value labels); \code{TRUE} by default} \item{ignore.case}{whether or not to make the keywords case sensitive; \code{TRUE} by default (case is ignored during matching)} \item{details}{add details about each variable (full details could be time consuming for big data frames, \code{FALSE} is equivalent to \code{"none"} and \code{TRUE} to \code{"full"})} \item{x}{a tibble returned by \code{look_for()}} } \value{ a tibble data frame featuring the variable position, name and description (if it exists) in the original data frame } \description{ \code{look_for} emulates the \code{lookfor} Stata command in \R. It supports searching into the variable names of regular \R data frames as well as into variable labels descriptions, factor levels and value labels. The command is meant to help users finding variables in large datasets. } \details{ When no keyword is provided, it will produce a data dictionary of the overall data frame. The function looks into the variable names for matches to the keywords. If available, variable labels are included in the search scope. Variable labels of data.frame imported with \pkg{foreign} or \pkg{memisc} packages will also be taken into account (see \code{\link[=to_labelled]{to_labelled()}}). If no keyword is provided, it will return all variables of \code{data}. \code{look_for()}, \code{lookfor()} and \code{generate_dictionary()} are equivalent. By default, results will be summarized when printing. To deactivate default printing, use \code{dplyr::as_tibble()}. \code{lookfor_to_long_format()} could be used to transform results with one row per factor level and per value label. Use \code{convert_list_columns_to_character()} to convert named list columns into character vectors (see examples). \code{look_for_and_select()} is a shortcut for selecting some variables and applying \code{dplyr::select()} to return a data frame with only the selected variables. } \examples{ look_for(iris) # Look for a single keyword. look_for(iris, "petal") look_for(iris, "s") iris \%>\% look_for_and_select("s") \%>\% head() # Look for with a regular expression look_for(iris, "petal|species") look_for(iris, "s$") # Look for with several keywords look_for(iris, "pet", "sp") look_for(iris, "pet", "sp", "width") look_for(iris, "Pet", "sp", "width", ignore.case = FALSE) # Look_for can search within factor levels or value labels look_for(iris, "vers") # Quicker search without variable details look_for(iris, details = "none") # To obtain more details about each variable look_for(iris, details = "full") # To deactivate default printing, convert to tibble look_for(iris, details = "full") \%>\% dplyr::as_tibble() # To convert named lists into character vectors look_for(iris) \%>\% convert_list_columns_to_character() # Long format with one row per factor and per value label look_for(iris) \%>\% lookfor_to_long_format() # Both functions can be combined look_for(iris) \%>\% lookfor_to_long_format() \%>\% convert_list_columns_to_character() # Labelled data d <- dplyr::tibble( region = labelled_spss( c(1, 2, 1, 9, 2, 3), c(north = 1, south = 2, center = 3, missing = 9), na_values = 9, label = "Region of the respondent" ), sex = labelled( c("f", "f", "m", "m", "m", "f"), c(female = "f", male = "m"), label = "Sex of the respondent" ) ) look_for(d) d \%>\% look_for() \%>\% lookfor_to_long_format() \%>\% convert_list_columns_to_character() } \author{ François Briatte \href{mailto:f.briatte@gmail.com}{f.briatte@gmail.com}, Joseph Larmarange \href{mailto:joseph@larmarange.net}{joseph@larmarange.net} } labelled/man/update_labelled.Rd0000644000176200001440000000252314357761455016211 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/retrocompatibility.R \name{update_labelled} \alias{update_labelled} \alias{update_labelled.labelled} \alias{update_labelled.haven_labelled_spss} \alias{update_labelled.haven_labelled} \alias{update_labelled.data.frame} \title{Update labelled data to last version} \usage{ update_labelled(x) \method{update_labelled}{labelled}(x) \method{update_labelled}{haven_labelled_spss}(x) \method{update_labelled}{haven_labelled}(x) \method{update_labelled}{data.frame}(x) } \arguments{ \item{x}{An object (vector or data.frame) to convert.} } \description{ Labelled data imported with \pkg{haven} version 1.1.2 or before or created with \code{\link[haven:labelled]{haven::labelled()}} version 1.1.0 or before was using "labelled" and "labelled_spss" classes. } \details{ Since version 2.0.0 of these two packages, "haven_labelled" and "haven_labelled_spss" are used instead. Since haven 2.3.0, "haven_labelled" class has been evolving using now \pkg{vctrs} package. \code{update_labelled()} convert labelled vectors from the old to the new classes and to reconstruct all labelled vectors with the last version of the package. } \seealso{ \code{\link[haven:labelled]{haven::labelled()}}, \code{\link[haven:labelled_spss]{haven::labelled_spss()}} } labelled/man/is_prefixed.Rd0000644000176200001440000000043714357761455015406 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/is_prefixed.R \name{is_prefixed} \alias{is_prefixed} \title{Check if a factor is prefixed} \usage{ is_prefixed(x) } \arguments{ \item{x}{a factor} } \description{ Check if a factor is prefixed } labelled/man/val_labels_to_na.Rd0000644000176200001440000000075514357761455016374 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/to_na.R \name{val_labels_to_na} \alias{val_labels_to_na} \title{Recode value labels to NA} \usage{ val_labels_to_na(x) } \arguments{ \item{x}{Object to recode.} } \description{ For labelled variables, values with a label will be recoded to \code{NA}. } \examples{ v <- labelled(c(1, 2, 9, 1, 9), c(dk = 9)) val_labels_to_na(v) } \seealso{ \code{\link[haven:zap_labels]{haven::zap_labels()}} } labelled/man/unique_tagged_na.Rd0000644000176200001440000000355014357761455016403 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tagged_na.R \name{unique_tagged_na} \alias{unique_tagged_na} \alias{duplicated_tagged_na} \alias{order_tagged_na} \alias{sort_tagged_na} \title{Unique elements, duplicated, ordering and sorting with tagged NAs} \usage{ unique_tagged_na(x, fromLast = FALSE) duplicated_tagged_na(x, fromLast = FALSE) order_tagged_na( x, na.last = TRUE, decreasing = FALSE, method = c("auto", "shell", "radix"), na_decreasing = decreasing, untagged_na_last = TRUE ) sort_tagged_na( x, decreasing = FALSE, na.last = TRUE, na_decreasing = decreasing, untagged_na_last = TRUE ) } \arguments{ \item{x}{a vector} \item{fromLast}{logical indicating if duplication should be considered from the last} \item{na.last}{if \code{TRUE}, missing values in the data are put last; if \code{FALSE}, they are put first} \item{decreasing}{should the sort order be increasing or decreasing?} \item{method}{the method to be used, see \code{\link[base:order]{base::order()}}} \item{na_decreasing}{should the sort order for tagged NAs value be} \item{untagged_na_last}{should untagged \code{NA}s be sorted after tagged \code{NA}s? increasing or decreasing?} } \description{ These adaptations of \code{\link[base:unique]{base::unique()}}, \code{\link[base:duplicated]{base::duplicated()}}, \code{\link[base:order]{base::order()}} and \code{\link[base:sort]{base::sort()}} treats tagged NAs as distinct values. } \examples{ x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA) x \%>\% print_tagged_na() unique(x) \%>\% print_tagged_na() unique_tagged_na(x) \%>\% print_tagged_na() duplicated(x) duplicated_tagged_na(x) order(x) order_tagged_na(x) sort(x, na.last = TRUE) \%>\% print_tagged_na() sort_tagged_na(x) \%>\% print_tagged_na() } labelled/man/var_label.Rd0000644000176200001440000001115414444541060015013 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/var_label.R \name{var_label} \alias{var_label} \alias{var_label.data.frame} \alias{var_label<-} \alias{get_variable_labels} \alias{set_variable_labels} \alias{label_attribute} \alias{get_label_attribute} \alias{set_label_attribute} \alias{label_attribute<-} \title{Get / Set a variable label} \usage{ var_label(x, ...) \method{var_label}{data.frame}( x, unlist = FALSE, null_action = c("keep", "fill", "skip"), recurse = FALSE, ... ) var_label(x) <- value get_variable_labels(x, ...) set_variable_labels(.data, ..., .labels = NA, .strict = TRUE) label_attribute(x) get_label_attribute(x) set_label_attribute(x, value) label_attribute(x) <- value } \arguments{ \item{x}{a vector or a data.frame} \item{...}{name-value pairs of variable labels (see examples)} \item{unlist}{for data frames, return a named vector instead of a list} \item{null_action}{for data frames, by default \code{NULL} will be returned for columns with no variable label. Use \code{"fill"} to populate with the column name instead, or \code{"skip"} to remove such values from the returned list.} \item{recurse}{if \code{TRUE}, will apply \code{var_label()} on packed columns (see \code{\link[tidyr:pack]{tidyr::pack()}}) to return the variable labels of each sub-column; otherwise, the label of the group of columns will be returned.} \item{value}{a character string or \code{NULL} to remove the label For data frames, with \code{var_labels()}, it could also be a named list or a character vector of same length as the number of columns in \code{x}.} \item{.data}{a data frame or a vector} \item{.labels}{variable labels to be applied to the data.frame, using the same syntax as \code{value} in \code{var_label(df) <- value}.} \item{.strict}{should an error be returned if some labels doesn't correspond to a column of \code{x}?} } \value{ \code{set_variable_labels()} will return an updated copy of \code{.data}. } \description{ Get / Set a variable label } \details{ \code{get_variable_labels()} is identical to \code{var_label()}. For data frames, if you are using \verb{var_label()<-} and if \code{value} is a named list, only elements whose name will match a column of the data frame will be taken into account. If \code{value} is a character vector, labels should be in the same order as the columns of the data.frame. If you are using \verb{label_attribute()<-} or \code{set_label_attribute()} on a data frame, the label attribute will be attached to the data frame itself, not to a column of the data frame. If you are using packed columns (see \code{\link[tidyr:pack]{tidyr::pack()}}), please read the dedicated vignette. } \note{ \code{set_variable_labels()} could be used with \pkg{dplyr} syntax. } \examples{ var_label(iris$Sepal.Length) var_label(iris$Sepal.Length) <- 'Length of the sepal' \dontrun{ View(iris) } # To remove a variable label var_label(iris$Sepal.Length) <- NULL # To change several variable labels at once var_label(iris) <- c( "sepal length", "sepal width", "petal length", "petal width", "species" ) var_label(iris) var_label(iris) <- list( Petal.Width = "width of the petal", Petal.Length = "length of the petal", Sepal.Width = NULL, Sepal.Length = NULL ) var_label(iris) var_label(iris, null_action = "fill") var_label(iris, null_action = "skip") var_label(iris, unlist = TRUE) if (require(dplyr)) { # adding some variable labels df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) \%>\% set_variable_labels(s1 = "Sex", s2 = "Yes or No?") var_label(df) # removing a variable label df <- df \%>\% set_variable_labels(s2 = NULL) var_label(df$s2) # Set labels from dictionary, e.g. as read from external file # One description is missing, one has no match description = tibble( name = c( "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", "Something"), label = c( "Sepal length", "Sepal width", "Petal length", "Petal width", "something") ) var_labels <- setNames(as.list(description$label), description$name) iris_labelled <- iris \%>\% set_variable_labels(.labels = var_labels, .strict = FALSE) var_label(iris_labelled) # defining variable labels derived from variable names if (require(snakecase)) { iris <- iris \%>\% set_variable_labels(.labels = to_sentence_case(names(iris))) var_label(iris) } # example with a vector v <- 1:5 v <- v \%>\% set_variable_labels("a variable label") v v \%>\% set_variable_labels(NULL) } } labelled/man/to_labelled.Rd0000644000176200001440000001040514411254241015324 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/to_labelled.R \name{to_labelled} \alias{to_labelled} \alias{to_labelled.data.frame} \alias{to_labelled.list} \alias{to_labelled.data.set} \alias{to_labelled.importer} \alias{foreign_to_labelled} \alias{memisc_to_labelled} \alias{to_labelled.factor} \title{Convert to labelled data} \usage{ to_labelled(x, ...) \method{to_labelled}{data.frame}(x, ...) \method{to_labelled}{list}(x, ...) \method{to_labelled}{data.set}(x, ...) \method{to_labelled}{importer}(x, ...) foreign_to_labelled(x) memisc_to_labelled(x) \method{to_labelled}{factor}(x, labels = NULL, .quiet = FALSE, ...) } \arguments{ \item{x}{Factor or dataset to convert to labelled data frame} \item{...}{Not used} \item{labels}{When converting a factor only: an optional named vector indicating how factor levels should be coded. If a factor level is not found in \code{labels}, it will be converted to \code{NA}.} \item{.quiet}{do not display warnings for prefixed factors with duplicated codes} } \value{ A tbl data frame or a labelled vector. } \description{ Convert a factor or data imported with \pkg{foreign} or \pkg{memisc} to labelled data. } \details{ \code{to_labelled()} is a general wrapper calling the appropriate sub-functions. \code{memisc_to_labelled()} converts a \code{memisc::data.set()}]` object created with \pkg{memisc} package to a labelled data frame. \code{foreign_to_labelled()} converts data imported with \code{\link[foreign:read.spss]{foreign::read.spss()}} or \code{\link[foreign:read.dta]{foreign::read.dta()}} from \pkg{foreign} package to a labelled data frame, i.e. using \code{\link[haven:labelled]{haven::labelled()}}. Factors will not be converted. Therefore, you should use \code{use.value.labels = FALSE} when importing with \code{\link[foreign:read.spss]{foreign::read.spss()}} or \code{convert.factors = FALSE} when importing with \code{\link[foreign:read.dta]{foreign::read.dta()}}. To convert correctly defined missing values imported with \code{\link[foreign:read.spss]{foreign::read.spss()}}, you should have used \code{to.data.frame = FALSE} and \code{use.missings = FALSE}. If you used the option \code{to.data.frame = TRUE}, meta data describing missing values will not be attached to the import. If you used \code{use.missings = TRUE}, missing values would have been converted to \code{NA}. So far, missing values defined in \strong{Stata} are always imported as \code{NA} by \code{\link[foreign:read.dta]{foreign::read.dta()}} and could not be retrieved by \code{foreign_to_labelled()}. If you convert a labelled vector into a factor with prefix, i.e. by using \link[=to_factor]{to_factor(levels = "prefixed")}, \code{to_labelled.factor()} is able to reconvert it to a labelled vector with same values and labels. } \examples{ \dontrun{ # from foreign library(foreign) sav <- system.file("files", "electric.sav", package = "foreign") df <- to_labelled(read.spss( sav, to.data.frame = FALSE, use.value.labels = FALSE, use.missings = FALSE )) # from memisc library(memisc) nes1948.por <- UnZip('anes/NES1948.ZIP', 'NES1948.POR', package='memisc') nes1948 <- spss.portable.file(nes1948.por) ds <- as.data.set(nes1948) df <- to_labelled(ds) } # Converting factors to labelled vectors f <- factor( c("yes", "yes", "no", "no", "don't know", "no", "yes", "don't know") ) to_labelled(f) to_labelled(f, c("yes" = 1, "no" = 2, "don't know" = 9)) to_labelled(f, c("yes" = 1, "no" = 2)) to_labelled(f, c("yes" = "Y", "no" = "N", "don't know" = "DK")) s1 <- labelled(c('M', 'M', 'F'), c(Male = 'M', Female = 'F')) labels <- val_labels(s1) f1 <- to_factor(s1) f1 to_labelled(f1) identical(s1, to_labelled(f1)) to_labelled(f1, labels) identical(s1, to_labelled(f1, labels)) l <- labelled( c(1, 1, 2, 2, 9, 2, 1, 9), c("yes" = 1, "no" = 2, "don't know" = 9) ) f <- to_factor(l, levels = "p") f to_labelled(f) identical(to_labelled(f), l) } \seealso{ \code{\link[haven:labelled]{haven::labelled()}}, \code{\link[foreign:read.spss]{foreign::read.spss()}}, \code{\link[foreign:read.dta]{foreign::read.dta()}}, \code{memisc::data.set()}, \code{memisc::importer}, \code{\link[=to_factor]{to_factor()}}. } labelled/man/to_character.Rd0000644000176200001440000000502014444041655015522 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/to_character.R \name{to_character} \alias{to_character} \alias{to_character.double} \alias{to_character.haven_labelled} \alias{to_character.data.frame} \title{Convert input to a character vector} \usage{ to_character(x, ...) \method{to_character}{double}(x, explicit_tagged_na = FALSE, ...) \method{to_character}{haven_labelled}( x, levels = c("labels", "values", "prefixed"), nolabel_to_na = FALSE, user_na_to_na = FALSE, explicit_tagged_na = FALSE, ... ) \method{to_character}{data.frame}( x, levels = c("labels", "values", "prefixed"), nolabel_to_na = FALSE, user_na_to_na = FALSE, explicit_tagged_na = FALSE, labelled_only = TRUE, ... ) } \arguments{ \item{x}{Object to coerce to a character vector.} \item{...}{Other arguments passed down to method.} \item{explicit_tagged_na}{should tagged NA be kept?} \item{levels}{What should be used for the factor levels: the labels, the values or labels prefixed with values?} \item{nolabel_to_na}{Should values with no label be converted to \code{NA}?} \item{user_na_to_na}{user defined missing values into NA?} \item{labelled_only}{for a data.frame, convert only labelled variables to factors?} } \description{ By default, \code{to_character()} is a wrapper for \code{\link[base:character]{base::as.character()}}. For labelled vector, to_character allows to specify if value, labels or labels prefixed with values should be used for conversion. } \details{ If some values doesn't have a label, automatic labels will be created, except if \code{nolabel_to_na} is \code{TRUE}. When applied to a data.frame, only labelled vectors are converted by default to character. Use \code{labelled_only = FALSE} to convert all variables to characters. } \examples{ v <- labelled(c(1,2,2,2,3,9,1,3,2,NA), c(yes = 1, no = 3, "don't know" = 9)) to_character(v) to_character(v, nolabel_to_na = TRUE) to_character(v, "v") to_character(v, "p") df <- data.frame( a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), c = labelled( c("a", "a", "b", "c"), labels = c(No = "a", Maybe = "b", Yes = "c") ), d = 1:4, e = factor(c("item1", "item2", "item1", "item2")), f = c("itemA", "itemA", "itemB", "itemB"), stringsAsFactors = FALSE ) if (require(dplyr)) { glimpse(df) glimpse(to_character(df)) glimpse(to_character(df, labelled_only = FALSE)) } } labelled/man/recode.haven_labelled.Rd0000644000176200001440000000671714357761455017301 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/recode.R \name{recode.haven_labelled} \alias{recode.haven_labelled} \title{Recode values} \usage{ \method{recode}{haven_labelled}( .x, ..., .default = NULL, .missing = NULL, .keep_value_labels = TRUE, .combine_value_labels = FALSE, .sep = " / " ) } \arguments{ \item{.x}{A vector to modify} \item{...}{<\code{\link[rlang:dyn-dots]{dynamic-dots}}> Replacements. For character and factor \code{.x}, these should be named and replacement is based only on their name. For numeric \code{.x}, these can be named or not. If not named, the replacement is done based on position i.e. \code{.x} represents positions to look for in replacements. See examples. When named, the argument names should be the current values to be replaced, and the argument values should be the new (replacement) values. All replacements must be the same type, and must have either length one or the same length as \code{.x}.} \item{.default}{If supplied, all values not otherwise matched will be given this value. If not supplied and if the replacements are the same type as the original values in \code{.x}, unmatched values are not changed. If not supplied and if the replacements are not compatible, unmatched values are replaced with \code{NA}. \code{.default} must be either length 1 or the same length as \code{.x}.} \item{.missing}{If supplied, any missing values in \code{.x} will be replaced by this value. Must be either length 1 or the same length as \code{.x}.} \item{.keep_value_labels}{If TRUE, keep original value labels. If FALSE, remove value labels.} \item{.combine_value_labels}{If TRUE, will combine original value labels to generate new value labels. Note that unexpected results could be obtained if a same old value is recoded into several different new values.} \item{.sep}{Separator to be used when combining value labels.} } \description{ Extend \code{\link[dplyr:recode]{dplyr::recode()}} method from \pkg{dplyr} to works with labelled vectors. } \examples{ x <- labelled(1:3, c(yes = 1, no = 2)) x dplyr::recode(x, `3` = 2L) # do not keep value labels dplyr::recode(x, `3` = 2L, .keep_value_labels = FALSE) # be careful, changes are not of the same type (here integers), # NA arecreated dplyr::recode(x, `3` = 2) # except if you provide .default or new values for all old values dplyr::recode(x, `1` = 1, `2` = 1,`3` = 2) # if you change the type of the vector (here transformed into character) # value labels are lost dplyr::recode(x, `3` = "b", .default = "a") # use .keep_value_labels = FALSE to avoid a warning dplyr::recode(x, `3` = "b", .default = "a", .keep_value_labels = FALSE) # combine value labels x <- labelled( 1:4, c( "strongly agree" = 1, "agree" = 2, "disagree" = 3, "strongly disagree" = 4) ) dplyr::recode( x, `1` = 1L, `2` = 1L, `3` = 2L, `4` = 2L, .combine_value_labels = TRUE ) dplyr::recode( x, `2` = 1L, `4` = 3L, .combine_value_labels = TRUE ) dplyr::recode( x, `2` = 1L, `4` = 3L, .combine_value_labels = TRUE, .sep = " or " ) dplyr::recode( x, `2` = 1L, .default = 2L, .combine_value_labels = TRUE ) # example when combining some values without a label y <- labelled(1:4, c("strongly agree" = 1)) dplyr::recode(y, `2` = 1L, `4` = 3L, .combine_value_labels = TRUE) } \seealso{ \code{\link[dplyr:recode]{dplyr::recode()}} } labelled/man/copy_labels.Rd0000644000176200001440000000360514357761455015401 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/copy_labels.R \name{copy_labels} \alias{copy_labels} \alias{copy_labels_from} \title{Copy variable and value labels and SPSS-style missing value} \usage{ copy_labels(from, to, .strict = TRUE) copy_labels_from(to, from, .strict = TRUE) } \arguments{ \item{from}{A vector or a data.frame (or tibble) to copy labels from.} \item{to}{A vector or data.frame (or tibble) to copy labels to.} \item{.strict}{When \code{from} is a labelled vector, \code{to} have to be of the same type (numeric or character) in order to copy value labels and SPSS-style missing values. If this is not the case and \code{.strict = TRUE}, an error will be produced. If \code{.strict = FALSE}, only variable label will be copied.} } \description{ This function copies variable and value labels (including missing values) from one vector to another or from one data frame to another data frame. For data frame, labels are copied according to variable names, and only if variables are the same type in both data frames. } \details{ Some base \R functions like \code{\link[base:subset]{base::subset()}} drop variable and value labels attached to a variable. \code{copy_labels} could be used to restore these attributes. \code{copy_labels_from} is intended to be used with \pkg{dplyr} syntax, see examples. } \examples{ library(dplyr) df <- tibble( id = 1:3, happy = factor(c('yes', 'no', 'yes')), gender = labelled(c(1, 1, 2), c(female = 1, male = 2)) ) \%>\% set_variable_labels( id = "Individual ID", happy = "Are you happy?", gender = "Gender of respondent" ) var_label(df) fdf <- df \%>\% filter(id < 3) var_label(fdf) # some variable labels have been lost fdf <- fdf \%>\% copy_labels_from(df) var_label(fdf) # Alternative syntax fdf <- subset(df, id < 3) fdf <- copy_labels(from = df, to = fdf) } labelled/man/val_labels.Rd0000644000176200001440000000664114444527456015212 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/val_labels.R \name{val_labels} \alias{val_labels} \alias{val_labels<-} \alias{val_label} \alias{val_label<-} \alias{get_value_labels} \alias{set_value_labels} \alias{add_value_labels} \alias{remove_value_labels} \title{Get / Set value labels} \usage{ val_labels(x, prefixed = FALSE) val_labels(x) <- value val_label(x, v, prefixed = FALSE) val_label(x, v) <- value get_value_labels(x, prefixed = FALSE) set_value_labels(.data, ..., .labels = NA, .strict = TRUE) add_value_labels(.data, ..., .strict = TRUE) remove_value_labels(.data, ..., .strict = TRUE) } \arguments{ \item{x}{A vector or a data.frame} \item{prefixed}{Should labels be prefixed with values?} \item{value}{A named vector for \code{val_labels()} (see \code{\link[haven:labelled]{haven::labelled()}}) or a character string for \code{val_label()}. \code{NULL} to remove the labels. For data frames, it could also be a named list with a vector of value labels per variable.} \item{v}{A single value.} \item{.data}{a data frame or a vector} \item{...}{name-value pairs of value labels (see examples)} \item{.labels}{value labels to be applied to the data.frame, using the same syntax as \code{value} in \code{val_labels(df) <- value}.} \item{.strict}{should an error be returned if some labels doesn't correspond to a column of \code{x}?} } \value{ \code{val_labels()} will return a named vector. \code{val_label()} will return a single character string. \code{set_value_labels()}, \code{add_value_labels()} and \code{remove_value_labels()} will return an updated copy of \code{.data}. } \description{ Get / Set value labels } \note{ \code{get_value_labels()} is identical to \code{val_labels()}. \code{set_value_labels()}, \code{add_value_labels()} and \code{remove_value_labels()} could be used with \pkg{dplyr} syntax. While \code{set_value_labels()} will replace the list of value labels, \code{add_value_labels()} and \code{remove_value_labels()} will update that list (see examples). \code{set_value_labels()} could also be applied to a vector / a data.frame column. In such case, you can provide a vector of value labels using \code{.labels} or several name-value pairs of value labels (see example). Similarly, \code{add_value_labels()} and \code{remove_value_labels()} could also be applied on vectors. } \examples{ v <- labelled(c(1,2,2,2,3,9,1,3,2,NA), c(yes = 1, no = 3, "don't know" = 9)) val_labels(v) val_labels(v, prefixed = TRUE) val_label(v, 2) val_label(v, 2) <- 'maybe' val_label(v, 9) <- NULL val_labels(v) <- NULL if (require(dplyr)) { # setting value labels df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) \%>\% set_value_labels( s1 = c(Male = "M", Female = "F"), s2 = c(Yes = 1, No = 2) ) val_labels(df) # updating value labels df <- df \%>\% add_value_labels(s2 = c(Unknown = 9)) df$s2 # removing a value labels df <- df \%>\% remove_value_labels(s2 = 9) df$s2 # removing all value labels df <- df \%>\% set_value_labels(s2 = NULL) df$s2 # example on a vector v <- 1:4 v <- set_value_labels(v, min = 1, max = 4) v v \%>\% set_value_labels(middle = 3) v \%>\% set_value_labels(NULL) v \%>\% set_value_labels(.labels = c(a = 1, b = 2, c= 3, d = 4)) v \%>\% add_value_labels(between = 2) v \%>\% remove_value_labels(4) } } labelled/DESCRIPTION0000644000176200001440000000267414444603422013540 0ustar liggesusersPackage: labelled Type: Package Title: Manipulating Labelled Data Version: 2.12.0 Maintainer: Joseph Larmarange Authors@R: c( person("Joseph", "Larmarange", email = "joseph@larmarange.net", role = c("aut", "cre"), comment = c(ORCID = "0000-0001-7097-700X")), person("Daniel", "Ludecke", role = "ctb"), person("Hadley", "Wickham", role = "ctb"), person("Michal", "Bojanowski", role = "ctb"), person("François", "Briatte", role = "ctb") ) Description: Work with labelled data imported from 'SPSS' or 'Stata' with 'haven' or 'foreign'. This package provides useful functions to deal with "haven_labelled" and "haven_labelled_spss" classes introduced by 'haven' package. License: GPL (>= 3) Encoding: UTF-8 Depends: R (>= 3.0) Imports: haven (>= 2.4.1), dplyr (>= 1.0.0), lifecycle, rlang, vctrs, stringr, tidyr Suggests: testthat, knitr, rmarkdown, questionr, snakecase, utf8, covr, spelling Enhances: memisc URL: https://larmarange.github.io/labelled/ BugReports: https://github.com/larmarange/labelled/issues VignetteBuilder: knitr LazyData: true RoxygenNote: 7.2.3 RdMacros: lifecycle Language: en-US NeedsCompilation: no Packaged: 2023-06-21 13:32:27 UTC; josep Author: Joseph Larmarange [aut, cre] (), Daniel Ludecke [ctb], Hadley Wickham [ctb], Michal Bojanowski [ctb], François Briatte [ctb] Repository: CRAN Date/Publication: 2023-06-21 14:20:02 UTC labelled/build/0000755000176200001440000000000014444575742013135 5ustar liggesuserslabelled/build/vignette.rds0000644000176200001440000000057214444575742015500 0ustar liggesusersSQk0sS&`/ o8`146g $~K3C+K)cg^|f] oZ)ޘ9/6 nPP?'爐S'u+Tp"p^fHlabelled/tests/0000755000176200001440000000000014357761455013201 5ustar liggesuserslabelled/tests/spelling.R0000644000176200001440000000023314357761455015137 0ustar liggesusersif (requireNamespace("spelling", quietly = TRUE)) spelling::spell_check_test( vignettes = TRUE, error = FALSE, skip_on_cran = TRUE ) labelled/tests/testthat/0000755000176200001440000000000014444603422015023 5ustar liggesuserslabelled/tests/testthat/test-copy_labels.r0000644000176200001440000000641214357761455020500 0ustar liggesuserscontext("Test copy_labels()") test_that("copy_labels() copy variable / value labels and missing values", { x <- labelled( c(1, 1, 2), labels = c(Male = 1, Female = 2), label = "Assigned sex at birth" ) y <- copy_labels(x, 1:3) expect_equal(var_label(x), var_label(y)) expect_equal(val_labels(x), val_labels(y)) expect_equal(na_range(x), na_range(y)) expect_equal(na_values(x), na_values(y)) x <- labelled_spss( 1:10, labels = c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, 19), label = "variable label" ) y <- 10:1 %>% copy_labels_from(x) expect_equal(var_label(x), var_label(y)) expect_equal(val_labels(x), val_labels(y)) expect_equal(na_range(x), na_range(y)) expect_equal(na_values(x), na_values(y)) x <- dplyr::tibble( a = labelled(c(1, 1, 2), c(yes = 1, no = 2)), b = labelled_spss(1:3, c(top = 1, bottom = 2), na_values = 3L), c = c("a", "b", "c") ) %>% set_variable_labels( a = "variable a", b = "variable B", c = "third variable" ) y <- dplyr::tibble( c = factor(c("a", "b", "c")), b = 2, a = 1:3, d = 9:7 ) %>% copy_labels_from(x) expect_equal(var_label(x$a), var_label(y$a)) expect_equal(val_labels(x$a), val_labels(y$a)) expect_equal(na_range(x$a), na_range(y$a)) expect_equal(na_values(x$a), na_values(y$a)) expect_equal(var_label(x$b), var_label(y$b)) expect_equal(val_labels(x$b), val_labels(y$b)) expect_equal(na_range(x$b), na_range(y$b)) expect_equal(na_values(x$b), na_values(y$b)) expect_equal(var_label(x$c), var_label(y$c)) }) test_that("if 'from' is not a labelled vector, copy only variable label", { # regardless of the class of 'to' x <- 1:10 var_label(x) <- "variable label" y <- 10:1 %>% copy_labels_from(x) expect_equal(var_label(x), var_label(y)) x <- factor(1:10) var_label(x) <- "variable label" y <- 10:1 %>% as.character() %>% copy_labels_from(x) expect_equal(var_label(x), var_label(y)) }) test_that("copy_labels checks", { # do not work with a list expect_error( copy_labels(list(1, 2), 1:2) ) expect_error( copy_labels(1:2, list(1, 2)) ) # if from is a data.frame, to should also be a data.frame expect_error( copy_labels(iris, 1:2) ) expect_error( copy_labels(1:2, iris) ) # if from is a labelled vector, to should have the same type x <- labelled( c(1, 1, 2), labels = c(Male = 1, Female = 2), label = "Assigned sex at birth" ) expect_error( copy_labels(x, c("1", "2")) ) x <- labelled_spss( 1:10, labels = c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, 19), label = "variable label" ) expect_error( copy_labels(x, c("1", "2")) ) # except if .strict = FALSE x <- labelled( c(1, 1, 2), labels = c(Male = 1, Female = 2), label = "Assigned sex at birth" ) expect_error( copy_labels(x, c("1", "2"), .strict = FALSE), NA ) x <- labelled_spss( 1:10, labels = c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, 19), label = "variable label" ) expect_error( copy_labels(x, c("1", "2"), .strict = FALSE), NA ) }) labelled/tests/testthat/test-miscellanous.R0000644000176200001440000000076714357761455020651 0ustar liggesuserscontext("miscellanous") # is_prefixed ------------------------------------------------------------- test_that("error with non factor argument", { x <- 1:2 expect_error(is_prefixed("x")) }) test_that("is_prefixed() works properly", { x <- labelled( c(1, 2, 2, 2, 9, 1, 2, NA), c(yes = 1, no = 2, "don't know" = 9) ) tfx <- to_factor(x, levels = "prefixed") expect_true(is_prefixed(tfx)) levels(tfx)[1] <- "not prefixed" expect_false(is_prefixed(tfx)) }) labelled/tests/testthat/test-tagged_na.r0000644000176200001440000000510614357761455020114 0ustar liggesuserscontext("Tests related to tagged NAs") test_that("unique_tagged_na(), duplicated_tagged_na, order_tagged_na and sort_tagged_na work as expected", { # nolint x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA) expect_equal( unique_tagged_na(x) %>% format_tagged_na() %>% trimws(), c("1", "2", "NA(a)", "NA(z)", "NA") ) expect_equal( unique_tagged_na(x, fromLast = TRUE) %>% format_tagged_na() %>% trimws(), c("1", "NA(z)", "2", "NA(a)", "NA") ) expect_equal( duplicated_tagged_na(x), c(FALSE, FALSE, FALSE, TRUE, FALSE, TRUE, TRUE, FALSE) ) expect_equal( duplicated_tagged_na(x, fromLast = TRUE), c(TRUE, TRUE, TRUE, FALSE, FALSE, FALSE, FALSE, FALSE) ) expect_equal( sort_tagged_na(x) %>% format_tagged_na() %>% trimws(), c("1", "1", "2", "2", "NA(a)", "NA(a)", "NA(z)", "NA") ) expect_equal( sort_tagged_na(x, decreasing = TRUE) %>% format_tagged_na() %>% trimws(), c("2", "2", "1", "1", "NA(z)", "NA(a)", "NA(a)", "NA") ) expect_equal( sort_tagged_na(x, na_decreasing = TRUE) %>% format_tagged_na() %>% trimws(), c("1", "1", "2", "2", "NA(z)", "NA(a)", "NA(a)", "NA") ) expect_equal( sort_tagged_na(x, untagged_na_last = FALSE) %>% format_tagged_na() %>% trimws(), c("1", "1", "2", "2", "NA", "NA(a)", "NA(a)", "NA(z)") ) }) test_that("tagged_na_to_user_na() works as expected", { x <- c( 1, 0, 1, tagged_na("r"), 0, tagged_na("d"), NA, tagged_na("d"), tagged_na("e") ) val_labels(x) <- c( no = 0, yes = 1, "don't know" = tagged_na("d"), refusal = tagged_na("r") ) expect_equal( tagged_na_to_user_na(x), labelled_spss( c(1, 0, 1, 4, 0, 2, NA, 2, 3), labels = c(no = 0, yes = 1, `don't know` = 2, refusal = 4, `NA(e)` = 3), na_range = c(2, 4) ) ) expect_equal( tagged_na_to_user_na(x, user_na_start = 8), labelled_spss( c(1, 0, 1, 10, 0, 8, NA, 8, 9), labels = c(no = 0, yes = 1, `don't know` = 8, refusal = 10, `NA(e)` = 9), na_range = c(8, 10) ) ) }) test_that("tagged_na_to_regular_na() works as expected", { y <- c(1, 0, 1, tagged_na("r"), 0, tagged_na("d")) val_labels(y) <- c( no = 0, yes = 1, "don't know" = tagged_na("d"), refusal = tagged_na("r") ) expect_false(any(tagged_na_to_regular_na(y) %>% is_tagged_na())) test <- rep(c(-99, -99, 3, 5, -1), 120) labelled::na_values(test) <- c(-99, -1) expect_warning( user_na_to_tagged_na(test), NA ) }) labelled/tests/testthat/test-na_values.R0000644000176200001440000001176414357761455020127 0ustar liggesuserscontext("na_values") # na_values -------------------------------------------------------------------- test_that("na_values works with data.frame", { xhs <- haven::labelled_spss( c(1, 2, 3, NA, 99), c(t1 = 1, t2 = 2, Missing = 99), na_value = 99, label = "variable label" ) y <- c(1:4, NA) df <- data.frame(xhs = xhs, y = y, stringsAsFactors = FALSE) res <- list(xhs = 99, y = NULL) expect_equal(na_values(df), res) }) # na_range -------------------------------------------------------------------- test_that("na_range works with data.frame", { xhs <- haven::labelled_spss( c(1, 2, 3, NA, 99), c(t1 = 1, t2 = 2, Missing = 99), na_value = 99, na_range = c(99, Inf), label = "variable label" ) y <- c(1:4, NA) df <- data.frame(xhs = xhs, y = y, stringsAsFactors = FALSE) res <- list(xhs = c(99, Inf), y = NULL) expect_equal(na_range(df), res) }) # user_na_to_na ---------------------------------------------------------------- test_that("user_na_to_na works with data.frame", { xhs <- haven::labelled_spss( c(c(1, 2, 3), NA, 99), c(t1 = 1, t2 = 2, Missing = 99), na_value = 99, na_range = c(99, Inf), label = "variable label" ) y <- c(1:4, NA) df <- data.frame(xhs = xhs, y = y, stringsAsFactors = FALSE) una_df <- user_na_to_na(df) expect_equal(df$y, y) expect_null(na_values(una_df$x)) expect_null(na_range(una_df$x)) }) # set_na_values ---------------------------------------------------------------- test_that("set_na_values works correctly", { df <- dplyr::tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) svdf <- set_value_labels(df, s2 = c(yes = 1, no = 2)) sna_svdf <- set_na_values(svdf, s2 = 9) expect_equal(which(is.na(sna_svdf$s2)), 4L) expect_error(set_na_values(svdf, s4 = 8, s2 = 9)) sna_svdfF <- set_na_values(svdf, s4 = 8, s2 = 9, .strict = FALSE) expect_equal(sna_svdf, sna_svdfF) expect_error(set_na_values(svdf, .values = list(s2 = 9, s4 = 3))) expect_error(set_na_values(svdf, .values = c(s2 = 9))) snu_svdf <- set_na_values(sna_svdfF, s2 = NULL) expect_equal(snu_svdf, svdf) snu_svdf <- set_na_values(sna_svdf, s2 = NULL) expect_equal(snu_svdf, svdf) df <- dplyr::tibble(s1 = c(2, 4, 7, 9), s2 = c(1, 1, 2, 9)) svdf <- set_value_labels(df, s2 = c(yes = 1, no = 2)) sna_svdf <- set_na_values(svdf, .values = 9L) expect_equal(na_values(sna_svdf), list(s1 = 9, s2 = 9)) sna_svdfF <- set_na_values( svdf, .values = list(s1 = 9, s3 = 2), .strict = FALSE ) expect_equal(na_values(sna_svdfF), list(s1 = 9, s2 = NULL)) }) # set_na_range ----------------------------------------------------------------- test_that("set_na_range works correctly", { df <- dplyr::tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) svdf <- set_value_labels(df, s2 = c(yes = 1, no = 2)) expect_error(set_na_range(svdf, s2 = 9)) expect_error(set_na_range(svdf, n2 = c(9, Inf))) snr_svdf <- set_na_range(svdf, s2 = c(9, Inf)) expect_equal(na_range(snr_svdf$s2), c(9, Inf)) expect_error(set_na_range(svdf, s2 = c(9, Inf), s4 = c(0, 10))) snr_svdfF <- set_na_range(svdf, s2 = c(9, Inf), s4 = c(0, 10), .strict = FALSE) expect_equal(snr_svdfF, snr_svdf) expect_error(set_na_range(svdf, .values = c(s2 = 9))) expect_error( set_na_range(svdf, .values = list(s2 = c(9, Inf), s4 = c(0, 10))) ) snrv_svdf <- set_na_range(svdf, .values = list(s2 = c(9, Inf))) expect_equal(snrv_svdf, snr_svdf) snrv_svdfF <- set_na_range( svdf, .values = list(s2 = c(9, Inf), s4 = c(0, 10)), .strict = FALSE ) expect_equal(snrv_svdfF, snr_svdf) df <- dplyr::tibble(s1 = c(2, 4, 7, 9), s2 = c(1, 1, 2, 9)) svdf <- set_value_labels(df, s2 = c(yes = 1, no = 2)) sna_svdf <- set_na_range(svdf, .values = c(9L, 100L)) expect_equal(na_range(sna_svdf), list(s1 = c(9, 100), s2 = c(9, 100))) x <- factor(1:5) expect_error(na_values(x) <- 1) expect_error(na_range(x) <- 4:5) v <- 1:10 v <- set_na_range(v, 3, 5) v <- set_na_values(v, 8, 9) expect_equal(na_range(v), c(3, 5)) expect_equal(na_values(v), c(8, 9)) }) test_that("about user NAs", { v <- labelled_spss( c(1, 2, 9, 3, 9, 1, NA), labels = c(yes = 1, no = 3, "don't know" = 9), na_values = 9 ) expect_equal( is.na(v), c(FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, TRUE) ) expect_equal( is_user_na(v), c(FALSE, FALSE, TRUE, FALSE, TRUE, FALSE, FALSE) ) expect_equal( user_na_to_tagged_na(v) %>% format_tagged_na() %>% trimws(), c("1", "2", "NA(a)", "3", "NA(a)", "1", "NA") ) expect_equal( user_na_to_na(letters), letters ) x <- c(NA, 9, tagged_na("a")) na_values(x) <- 9 expect_equal(is.na(x), c(TRUE, TRUE, TRUE)) expect_equal(is_regular_na(x), c(TRUE, FALSE, FALSE)) expect_equal(is_user_na(x), c(FALSE, TRUE, FALSE)) expect_equal(is_tagged_na(x), c(FALSE, FALSE, TRUE)) }) labelled/tests/testthat/test-to_labelled.r0000644000176200001440000001520014444041655020433 0ustar liggesuserscontext("Test to_labelled()") test_that("to_labelled.factor preserves variable label", { x <- factor(c(1, 1, 2)) var_label(x) <- "test" expect_equal(var_label(to_labelled(x)), var_label(x)) x <- factor(c("no", "yes", "no")) var_label(x) <- "test" expect_equal( var_label(to_labelled(x, labels = c("yes" = 1, "no" = 2))), var_label(x) ) }) test_that("to_labelled.factor preserves labelled character vectors", { s1 <- labelled(c("M", "M", "F"), c(Male = "M", Female = "F")) expect_equal(s1, to_labelled(to_factor(s1), val_labels(s1))) }) test_that("to_labelled.factor preserves labelled numerical vectors", { s2 <- labelled(c(1, 1, 2), c(Male = 1, Female = 2)) expect_equal(s2, to_labelled(to_factor(s2), val_labels(s2))) }) test_that("to_labelled.factor converts to NA factor levels not found in labels", { #nolint f <- factor( c("yes", "yes", "no", "no", "don't know", "no", "yes", "don't know") ) expect_equal( to_labelled(f, c("yes" = 1, "no" = 2)), labelled(c(1, 1, 2, 2, NA, 2, 1, NA), c("yes" = 1, "no" = 2)) ) }) test_that("to_labelled.factor accepts non continuous labels", { f <- factor( c("yes", "yes", "no", "no", "don't know", "no", "yes", "don't know") ) expect_equal( to_labelled(f, c("yes" = 1, "no" = 2, "don't know" = 9)), labelled( c(1, 1, 2, 2, 9, 2, 1, 9), c("yes" = 1, "no" = 2, "don't know" = 9) ) ) }) test_that("to_labelled.factor works with '[code] label' factors", { l <- labelled( c(1, 1, 2, 2, 9, 2, 1, 9), c("yes" = 1, "no" = 2, "don't know" = 9) ) expect_equal( to_factor(l, levels = "p") %>% to_labelled(), l ) l <- labelled( c("M", "M", "F", "X", "N/A"), c(Male = "M", Female = "F", Refused = "X", "Not applicable" = "N/A") ) expect_equal( to_factor(l, levels = "p") %>% to_labelled(), l ) # if labels is provided apply normal rule l <- labelled( c(1, 1, 2, 2, 9, 2, 1, 9), c("yes" = 1, "no" = 2, "don't know" = 9) ) f <- to_factor(l, levels = "p") x <- f %>% to_labelled(labels = c("[1] yes" = 123, "[2] no" = 456)) expect_equivalent( unclass(x), c(123, 123, 456, 456, NA, 456, 123, NA) ) # should not be applied if duplicates in code f <- factor(c("[1] yes", "[2] no", "[1] don't know")) expect_warning(l <- to_labelled(f)) expect_warning(l <- to_labelled(f, .quiet = TRUE), NA) expect_identical( names(val_labels(l)), levels(f) ) # check potential duplicates in numerical codes f <- factor(c("[1] yes", "[1.0] no", "[01] don't know")) expect_warning(to_labelled(f)) expect_warning(to_labelled(f, .quiet = TRUE), NA) expect_true(is.character(to_labelled(f, .quiet = TRUE))) }) # foreign_to_labelled ----------------------------------------------------- test_that("foreign_to_labelled works correctly", { utils::data("spss_file", package = "labelled") utils::data("dta_file", package = "labelled") tl_spss_list <- to_labelled(spss_file) expect_equal( val_labels(tl_spss_list), sapply(spss_file, function(x) attr(x, "value.labels", exact = TRUE)) ) expect_equal( var_label(tl_spss_list), as.list(attr(spss_file, "variable.labels", exact = TRUE)) ) miss_attr <- attr(spss_file, "missings", exact = TRUE) miss_list <- lapply( miss_attr, function(x) if (x$type == "none") return(NULL) else return(x$value) ) expect_equal(sapply(tl_spss_list, na_values), miss_list) expect_true( all( which(sapply(tl_spss_list, function(x) any(is.na(x)))) == c(4, 5, 7, 10) ) ) tl_spss_df <- to_labelled(as.data.frame(spss_file, stringsAsFactors = FALSE)) expect_equal( val_labels(tl_spss_df), sapply(spss_file, function(x) attr(x, "value.labels", exact = TRUE)) ) expect_true(all(sapply(var_label(tl_spss_df), is.null))) expect_true(all(sapply(sapply(tl_spss_df, na_values), is.null))) expect_true(all(sapply(sapply(tl_spss_df, na_range), is.null))) tl_dta_df <- to_labelled(dta_file) expect_equal( val_labels(tl_dta_df), sapply(dta_file, function(x) attr(x, "value.labels", exact = TRUE)) ) expect_equal( unname(unlist(var_label(tl_dta_df))), attr(dta_file, "var.labels", exact = TRUE) ) expect_true(all(sapply(sapply(tl_dta_df, na_values), is.null))) expect_true(all(sapply(sapply(tl_dta_df, na_range), is.null))) }) # memisc_to_labelled ----------------------------------------------------- test_that("memisc_to_labelled works correctly", { skip_if_not_installed("memisc") ds <- memisc::data.set( vote = sample(c(1, 2, 3, 8, 9, 97, 99), size = 300, replace = TRUE), region = sample(c(rep(1, 3), rep(2, 2), 3, 99), size = 300, replace = TRUE), income = exp(rnorm(300, sd = .7)) * 2000 ) memisc::description(ds$vote) <- "Vote intention" memisc::description(ds$region) <- "Region of residence" memisc::description(ds$income) <- "Household income" memisc::missing.values(ds$vote) <- c(97, 99) memisc::missing.values(ds$region) <- list(range = c(90, Inf)) memisc::labels(ds$region) <- c( England = 1, Scotland = 2, Wales = 3, "Not applicable" = 97, "Not asked in survey" = 99) memisc::labels(ds$vote) <- c( Conservatives = 1, Labour = 2, "Liberal Democrats" = 3, "Don't know" = 8, "Answer refused" = 9, "Not applicable" = 97, "Not asked in survey" = 99) tl_ds <- to_labelled(ds) desc <- data.frame(memisc::description(ds)) var_label_ds <- desc[, 2] names(var_label_ds) <- desc[, 1] expect_identical(unlist(var_label(tl_ds)), var_label_ds) if (any(sapply(val_labels(tl_ds), function(x) !is.null(x)))) { val_labels_ds <- lapply(ds, function(x) memisc::labels(x)) val_labels_ds <- lapply(ds, function(x) { vlabs <- memisc::labels(x) if (is.null(vlabs)) return(NULL) vals <- vlabs@values names(vals) <- vlabs@.Data return(vals) }) expect_identical(val_labels(tl_ds), val_labels_ds) } }) test_that("to_character works on data.frame", { df <- data.frame( x = labelled(c(1, 1, 2), c(yes = 1, no = 2)), y = c("a", "a", "b"), z = 1:3, stringsAsFactors = FALSE ) df2 <- to_character(df) expect_true(is.character(df2$x)) expect_equal(class(df2$y), class(df$y)) expect_equal(class(df2$z), class(df$z)) df3 <- to_character(df, labelled_only = FALSE) expect_true(is.character(df3$y)) expect_true(is.character(df3$z)) }) labelled/tests/testthat/test_lookfor.R0000644000176200001440000001066714361242573017676 0ustar liggesuserscontext("Test look_for()") test_that("look_for works correctly", { df <- data.frame( 1:3, letters[1:3], fix.empty.names = FALSE, stringsAsFactors = FALSE ) expect_error(look_for(df)) expect_error(look_for(unname(df))) df <- data.frame(num = 1:3, ch = letters[1:3], stringsAsFactors = FALSE) res <- look_for(df, "e") capture.output(print(res)) expect_true(nrow(res) == 0) }) test_that("look_for works with a single keyword.", { expect_equal( look_for(iris, "sep")$variable, c("Sepal.Length", "Sepal.Width") ) lfi <- look_for(iris, "s") expect_equal( lfi$variable, c("Sepal.Length", "Sepal.Width", "Species") ) expect_equal( lfi$levels, list( "Sepal.Length" = NULL, "Sepal.Width" = NULL, "Species" = levels(iris$Species) ) ) expect_equal(lfi$variable, names(iris)[lfi$pos]) expect_error( look_for(iris, "petal") %>% dplyr::select(pos) %>% print(), NA ) }) test_that("look_for works with no single keyword.", { expect_equal( look_for(iris, details = TRUE)$variable, names(iris) ) }) test_that("look_for works with a regular expression", { lfi <- look_for(iris, "s") expect_identical(look_for(iris, "sepal|species"), lfi) lfi <- look_for(iris, "s$") expect_identical( lfi$levels[[lfi$variable]], levels(iris$Species) ) }) test_that("look_for works with several keywords", { expect_equal( look_for(iris, details = "none", "s", "w")$variable, c("Sepal.Length", "Sepal.Width", "Petal.Width", "Species") ) expect_equal( look_for(iris, "Pet", "sp", "width", ignore.case = FALSE)$variable, c("Petal.Length", "Petal.Width") ) }) test_that(" look_for with different details parameter values", { expect_false("levels" %in% names(look_for(iris, details = "none"))) expect_false("range" %in% names(look_for(iris, "Sep"))) expect_equal( look_for(iris, details = TRUE, "sep")$range, list( Sepal.Length = range(iris$Sepal.Length), Sepal.Width = range(iris$Sepal.Width) ) ) }) test_that(" convert_list_columns_to_character works correctly", { lfi_conv <- look_for(iris, "spe", details = TRUE) %>% convert_list_columns_to_character() expect_equal( unname(lfi_conv$levels), paste(levels(iris$Species), collapse = "; ") ) lfi_conv <- look_for(iris, "al", details = TRUE) %>% convert_list_columns_to_character() expect_identical( lfi_conv$range, sapply( lapply(iris[, lfi_conv$variable], range), function(x) paste(x, collapse = " - ") ) ) lfi_conv <- look_for(iris, "sep") %>% convert_list_columns_to_character() expect_true(all(lfi_conv$levels == c("", ""))) expect_true(all(lfi_conv$value_labels == c("", ""))) }) test_that(" look_for_and_select works correctly", { expect_equal( names(look_for_and_select(iris, "sep")), c("Sepal.Length", "Sepal.Width") ) }) test_that(" print.look_for works correctly", { pp <- print(look_for(iris)) expect_equal( pp$variable[nchar(pp$variable) != 0], names(iris) ) expect_equal( pp$values[nchar(pp$values) != 0], levels(iris$Species) ) }) test_that(" lookfor_to_long_format works correctly", { lf2lf <- look_for(iris) %>% lookfor_to_long_format() expect_equal( lf2lf$levels[lf2lf$variable == "Species"], levels(iris$Species) ) expect_equal( iris, lookfor_to_long_format(iris) ) expect_true(all(is.na(lf2lf$levels[lf2lf$variable != "Species"]))) }) test_that("look_for get var_label", { df <- data.frame(col1 = 1:2, col2 = 3:4, stringsAsFactors = FALSE) expect_equal(nrow(look_for(df, "lb")), 0) var_label(df) <- c("lb1", "lb2") lfd <- look_for(df, "lb") expect_equal( lfd$variable, names(df) ) expect_equal( unname(lfd$label), c("lb1", "lb2") ) }) test_that("look_for works with factor levels and value labels", { res <- iris %>% look_for("vers", details = "none") expect_equal(res$variable, "Species") res <- iris %>% look_for("vers", details = "none", values = FALSE) expect_equal(nrow(res), 0) df <- iris df$Species <- to_labelled(df$Species) res <- df %>% look_for("vers", details = "none") expect_equal(res$variable, "Species") res <- df %>% look_for("vers", details = "none", values = FALSE) expect_equal(nrow(res), 0) }) labelled/tests/testthat/test-labelled.r0000644000176200001440000010470614444527456017753 0ustar liggesuserscontext("Labelled") # var_label -------------------------------------------------------------- test_that("var_label works properly", { x <- 1:3 var_label(x) <- "value" expect_equal(attr(x, "label"), "value") expect_equal(var_label(x), "value") var_label(x) <- NULL expect_null(attr(x, "label")) expect_null(var_label(x)) x <- 1:3 x <- set_variable_labels(x, "value") expect_equal(attr(x, "label"), "value") x <- set_variable_labels(x, .labels = "other value") expect_equal(attr(x, "label"), "other value") x <- set_variable_labels(x, NULL) expect_null(attr(x, "label")) }) test_that("var_label works on data.frame", { df <- data.frame(x = 1:3, y = c("a", "b", "c"), stringsAsFactors = FALSE) var_label(df$x) <- "var x" expect_equal(var_label(df$x), "var x") expect_equal(var_label(df), list(x = "var x", y = NULL)) var_label(df) <- list(y = "YY", x = "XX") expect_equal(var_label(df), list(x = "XX", y = "YY")) var_label(df) <- NULL expect_equal(var_label(df), list(x = NULL, y = NULL)) var_label(df) <- c("var1", "var2") expect_equal(var_label(df), list(x = "var1", y = "var2")) df <- set_variable_labels(df, x = "XX", .labels = "other") expect_equal(var_label(df), list(x = "XX", y = "other")) df <- set_variable_labels(df, .labels = c("var1", "var2")) expect_equal(var_label(df), list(x = "var1", y = "var2")) }) test_that("var_label produce appropriate errors", { df <- data.frame(x = 1:3, y = c("a", "b", "c"), stringsAsFactors = FALSE) expect_error(var_label(df) <- c("var1", "var2", "var3")) expect_error(var_label(df) <- list(x = "xx", z = "zz")) expect_error( df %>% set_variable_labels(.labels = list(x = "xx", z = "zz")) ) expect_error( df %>% set_variable_labels(x = "ghj", z = "ggg") ) # no error if .strict = FALSE expect_error( df %>% set_variable_labels(.labels = list(x = "xx", z = "zz"), .strict = FALSE), NA ) expect_error( df %>% set_variable_labels(x = "ghj", z = "ggg", .strict = FALSE), NA ) }) test_that("var_label preserved data.frame type", { tb <- dplyr::tibble(x = 1:3, y = c("a", "b", "c")) before <- class(tb) var_label(tb$x) <- "var x" var_label(tb) <- list(y = "YY", x = "XX") after <- class(tb) expect_equal(before, after) }) # labelled -------------------------------------------------------------- test_that("labelled return an object of class haven_labelled", { x <- labelled(c(1, 2, 3), c(yes = 1, maybe = 2, no = 3)) expect_true(is.labelled(x)) expect_s3_class(x, "haven_labelled") }) test_that("x must be numeric or character", { expect_error(labelled(TRUE)) }) test_that("x and labels must be compatible", { expect_error(labelled(1, "a")) expect_error(labelled(1, c(female = 2L, male = 1L)), NA) expect_error(labelled(1L, c(female = 2, male = 1)), NA) }) test_that("labels must have names", { expect_error(labelled(1, 1)) }) # val_labels and val_label ------------------------------------------------ test_that("val_labels preserves variable label", { x <- 1:3 var_label(x) <- "test" val_labels(x) <- c(yes = 1, no = 2) expect_equal(attr(x, "label", exact = TRUE), "test") val_labels(x) <- NULL expect_equal(attr(x, "label", exact = TRUE), "test") }) test_that("val_label preserves variable label", { x <- 1:3 var_label(x) <- "test" val_label(x, 1) <- "yes" expect_equal(attr(x, "label", exact = TRUE), "test") val_label(x, 1) <- NULL expect_equal(attr(x, "label", exact = TRUE), "test") }) test_that("val_labels and val_label preserves spss missing values", { x <- labelled_spss( 1:10, c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, Inf) ) val_labels(x) <- c(yes = 1, no = 3) val_label(x, 2) <- "maybe" expect_true(inherits(x, "haven_labelled")) expect_true(inherits(x, "haven_labelled_spss")) expect_equal(attr(x, "na_values"), c(9, 10)) expect_equal(attr(x, "na_range"), c(11, Inf)) val_label(x, 2) <- "maybe" expect_true(inherits(x, "haven_labelled")) expect_true(inherits(x, "haven_labelled_spss")) expect_equal(attr(x, "na_values"), c(9, 10)) expect_equal(attr(x, "na_range"), c(11, Inf)) expect_equal(attr(x, "labels", exact = TRUE), c(yes = 1, no = 3, maybe = 2)) }) test_that("value labels can be removed if missing values are defined", { x <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) val_labels(x) <- NULL expect_null(val_labels(x)) x <- labelled_spss(1:10, c(Good = 1), na_range = c(9, 20)) val_labels(x) <- NULL expect_null(val_labels(x)) }) test_that("value labels to NULL remove class if na_Values et na_range are NULL", { # nolint x <- labelled_spss(1:10, c(Good = 1, Bad = 8)) val_labels(x) <- NULL expect_null(val_labels(x)) expect_equal(match("labelled", names(attributes(x)), nomatch = 0), 0) }) test_that("error with non character argument", { x <- 1 expect_error(var_label(x) <- 1) }) test_that("error with mutilple character argument", { x <- 1 expect_error(var_label(x) <- c("a", "b")) }) test_that("test if unlist argument works properly", { df <- data.frame(col1 = 1:2, col2 = 3:4, stringsAsFactors = FALSE) expect_equal(var_label(df, unlist = TRUE), c(col1 = "", col2 = "")) var_label(df) <- c("lb1", "lb2") expect_equal(var_label(df, unlist = TRUE), c(col1 = "lb1", col2 = "lb2")) }) test_that("val_labels prefixed argument 100%", { v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) vlv <- val_labels(v) vlvp <- val_labels(v, prefixed = TRUE) noms_vlvp <- names(vlvp) pos <- regexpr("] ", noms_vlvp) noms_vlvp <- substring(noms_vlvp, pos + 2) names(vlvp) <- noms_vlvp expect_equal(vlv, vlvp) }) test_that("val_labels works for dataframe", { v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) y <- 1:10 df <- data.frame(v = v, y = y, stringsAsFactors = FALSE) res <- list(v = val_labels(v), y = NULL) expect_equal(val_labels(df), res) }) test_that(" 'val_labels <-' works for dataframe", { xhs <- labelled_spss( c(1:3, NA, 5:10), c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, Inf) ) num <- 1:10 ch <- letters[1:10] fac <- factor(paste0("f", 1:10)) df <- data.frame( xhs = xhs, num = num, ch = ch, fac = fac, stringsAsFactors = FALSE ) expect_error(val_labels(df) <- c(one = 1)) valeurs <- list( xhs = c(two = 2, five = 5), ch = c(leter_a = "a"), num = c(two = 2), fac = c(three = factor(2)) ) vldf <- df expect_error(val_labels(vldf) <- valeurs) valeurs <- list( xhs = c(two = 2, five = 5), ch = c(leter_a = "a"), num = c(two = 2) ) vldf <- df expect_error(val_labels(vldf) <- valeurs, NA) expect_null(val_labels(vldf)$fac) expect_equal(df$fac, vldf$fac) noms <- c("xhs", "num", "ch") expect_equal(val_labels(vldf)[noms], valeurs[noms]) val_labels(df) <- NULL expect_true(all(sapply(val_labels(df), is.null))) }) test_that("val_label works for haven_labelled", { v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) expect_equal(val_label(v, 2), NULL) expect_equal(val_label(v, 1), "yes") expect_equal(val_label(v, 1, prefixed = TRUE), "[1] yes") expect_error(val_label(v, 1:2)) }) test_that("val_label works for default", { num <- 1:3 ch <- letters[1:3] expect_equal(val_label(num, 2), NULL) expect_error(val_lable(num, 1:2)) expect_equal(val_label(ch, 1, prefixed = TRUE), NULL) expect_error(val_label(ch, 1:2)) }) test_that("val_label works for for dataframe", { xhs <- labelled_spss( c(1:3, NA, 5:10), c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, Inf) ) xh <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) num <- 1:10 df <- data.frame(xhs = xhs, num = num, xh = xh, stringsAsFactors = FALSE) expect_true(all(sapply(val_label(df, 2), is.null))) expect_equal( val_label(df, 1), list(xhs = "Good", num = NULL, xh = "yes") ) expect_equal( val_label(df, 3, prefixed = TRUE), list(xhs = NULL, num = NULL, xh = "[3] no") ) expect_error(val_lable(df, 1:2)) }) test_that(" 'val_label<-' works properly", { xhs <- labelled_spss( c(1:3, NA, 5:10), c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, Inf) ) xh <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) num <- 1:10 ch <- letters[1:10] expect_error(val_label(num, "a") <- "a") expect_error(val_label(xh, 12) <- c("one", "two")) expect_error(val_label(xhs, c(12, 13)) <- "twenty_two") df <- data.frame( xhs = xhs, num = num, xh = xh, ch = ch, stringsAsFactors = FALSE ) expect_error(val_label(df, 2) <- 2) expect_error(val_label(df, 2) <- two) expect_error(val_label(df, 2) <- c("a", "b")) expect_error(val_label(df, 2:3) <- "a") sub_df <- df[, -match("ch", names(df))] }) test_that(" 'val_label<-.data.frame' works properly", { xhs <- labelled_spss( c(1:3, NA, 5:10), c(Good = 1, Bad = 8), na_values = c(9, 10), na_range = c(11, Inf) ) num <- 1:10 ch <- letters[1:10] df <- data.frame(xhs = xhs, num = num, ch = ch, stringsAsFactors = FALSE) valeurs <- list(xhs = "2", ch = "letter_a", num = "two") df_c <- df expect_error(val_label(df_c, 2) <- valeurs) expect_error(val_label(df_c, "a") <- valeurs) val_label(df_c, 2) <- valeurs[-2] val_label(df_c, "a") <- valeurs[2] res_labels <- list( xhs = c(Good = 1, Bad = 8, "2" = 2), num = c(two = 2), ch = c(letter_a = "a") ) expect_equal(val_labels(df_c), res_labels) }) # remove_labels -------------------------------------------------------------- test_that("remove_label works correctly", { x <- c(1, 2, 2, 9) na_values(x) <- 9 val_labels(x) <- c(yes = 1, no = 2) var_label(x) <- "A test variable" expect_false(inherits(remove_labels(x), "haven_labelled")) expect_null(var_label(remove_labels(x))) expect_equal( var_label(remove_labels(x, keep_var_label = TRUE)), var_label(x) ) }) test_that("remove_labels strips labelled attributes", { var <- labelled(c(1L, 98L, 99L), c(not_answered = 98L, not_applicable = 99L)) exp <- c(1L, 98L, 99L) expect_equal(remove_labels(var), exp) }) test_that("remove_labels returns variables not of class('labelled') unmodified", { #nolint var <- c(1L, 98L, 99L) expect_equal(remove_labels(var), var) }) test_that("remove_labels works with data.frame", { var <- labelled(c(1L, 98L, 99L), c(not_answered = 98L, not_applicable = 99L)) exp <- c(1L, 98L, 99L) df <- data.frame(var = var, exp = exp, stringsAsFactors = FALSE) rmdf <- remove_labels(df) expect_equal(rmdf$exp, exp) expect_equal(rmdf$var, exp) }) test_that("remove_labels works with labelled_spss", { xhs <- haven::labelled_spss( c(1, 2, 3, NA, 99), c(t1 = 1, t2 = 2, Missing = 99), na_value = 99, na_range = c(99, Inf), label = "A test variable" ) expect_null(var_label(remove_labels(xhs))) expect_false(identical(var_label(remove_labels(xhs)), var_label(xhs))) expect_null(val_labels(remove_labels(xhs))) }) # remove_val_labels ------------------------------------------------------------ test_that("remove_labels works properly", { var <- labelled( c(1L, 98L, 99L), c(not_answered = 98L, not_applicable = 99L), label = "A variable label" ) exp <- c(1L, 98L, 99L) df <- data.frame(var = var, exp = exp, stringsAsFactors = FALSE) rmdf <- remove_val_labels(df) expect_null(val_labels(rmdf$var)) expect_false(identical(rmdf$var, exp)) expect_equal(rmdf$exp, exp) }) # remove_var_label ------------------------------------------------------------ test_that("remove_labels works properly", { var <- labelled( c(1L, 98L, 99L), c(not_answered = 98L, not_applicable = 99L), label = "A variable label" ) exp <- c(1L, 98L, 99L) df <- data.frame(var = var, exp = exp, stringsAsFactors = FALSE) rmdf <- remove_var_label(df) expect_null(var_label(rmdf$var)) expect_false(identical(rmdf$var, exp)) expect_equal(val_labels(rmdf$var), val_labels(var)) expect_equal(rmdf$exp, exp) }) # sort_val_labels --------------------------------------------------------- test_that("sort_val_labels works properly", { df <- data.frame( lab = labelled(c(1, 2, 3), c(maybe = 2, yes = 1, no = 3)), num = c(3, 1, 2), stringsAsFactors = FALSE ) sdf <- sort_val_labels(df) expect_equal( val_labels(sdf), list(lab = c(yes = 1, maybe = 2, no = 3), num = NULL) ) sdf <- sort_val_labels(df, decreasing = TRUE) expect_equal( val_labels(sdf), list(lab = c(no = 3, maybe = 2, yes = 1), num = NULL) ) sdf <- sort_val_labels(df, "l") expect_equal( val_labels(sdf), list(lab = c(maybe = 2, no = 3, yes = 1), num = NULL) ) sdf <- sort_val_labels(df, "l", TRUE) expect_equal( val_labels(sdf), list(lab = c(yes = 1, no = 3, maybe = 2), num = NULL) ) x <- c(2, tagged_na("z"), 1, tagged_na("a")) val_labels(x) <- c(no = 2, refused = tagged_na("z"), yes = 1, dk = tagged_na("a")) expect_equivalent( sort_val_labels(x, according_to = "v") %>% val_labels() %>% format_tagged_na() %>% trimws(), c("1", "2", "NA(a)", "NA(z)") ) expect_equivalent( sort_val_labels(x, according_to = "l") %>% val_labels() %>% names(), c("dk", "no", "refused", "yes") ) }) # remove_user_na -------------------------------------------------------------- test_that("remove_user_na works properly", { var <- labelled( c(1L, 2L, NA, 98L, 99L), c(not_answered = 98L, not_applicable = 99L), label = "A variable label" ) exp <- c(1L, 2L, NA, 98L, 99L) xhs <- haven::labelled_spss( c(1, 2, NA, 98, 99), c(t1 = 1, t2 = 2, Missing = 99), na_value = 99, na_range = c(99, Inf), label = "A test variable" ) df <- data.frame(var = var, exp = exp, xhs = xhs, stringsAsFactors = FALSE) rmtdf <- remove_user_na(df, user_na_to_na = TRUE) expect_equal(rmtdf$var, var) expect_equal(rmtdf$exp, exp) expect_null(na_values(rmtdf$xhs)) expect_equal(rmtdf$exp, exp) rmfdf <- remove_user_na(df, user_na_to_na = FALSE) expect_false(is.null(var_label(rmfdf$var))) rmfdf <- remove_user_na(df, user_na_to_tagged_na = TRUE) expect_equal( na_tag(rmfdf$xhs), c(NA, NA, NA, NA, "a") ) x <- labelled_spss(1:100, na_range = c(50, 100)) expect_warning(remove_user_na(x, user_na_to_tagged_na = TRUE)) }) # to_factor -------------------------------------------------------------------- test_that("to_factor preserves variable label", { x <- labelled(c(1, 1, 2), c(yes = 1, no = 2)) var_label(x) <- "yes/no" expect_equal(var_label(to_factor(x)), var_label(x)) }) test_that("strict option of to_factor works correctly", { v <- labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)) expect_s3_class(to_factor(v, strict = FALSE), "factor") expect_s3_class(to_factor(v, strict = TRUE), "haven_labelled") expect_equal(class(to_factor(v, strict = TRUE, unclass = TRUE)), "numeric") }) test_that("to_factor works on data.frame", { df <- data.frame( x = labelled(c(1, 1, 2), c(yes = 1, no = 2)), y = c("a", "a", "b"), z = 1:3, stringsAsFactors = FALSE ) df2 <- to_factor(df) expect_true(is.factor(df2$x)) expect_equal(class(df2$y), class(df$y)) expect_equal(class(df2$z), class(df$z)) df3 <- to_factor(df, labelled_only = FALSE) expect_true(is.factor(df3$y)) expect_true(is.factor(df3$z)) }) test_that("to_factor does not change a factor", { x <- factor(1:2) expect_equal(to_factor(x), x) }) test_that("to_factor keeps labels", { x <- 1:2 lab_name <- "vector" var_label(x) <- lab_name expect_equal(var_label(to_factor(x)), lab_name) }) test_that("to_factor boolean parameters", { x1 <- haven::labelled_spss( c(1, 2, 3, 5, 4, NA, 99), c(t1 = 1, t2 = 2, t5 = 5, Missing = 99), na_value = 99 ) tfx <- to_factor(x1, user_na_to_na = TRUE) expect_equal(which(is.na(tfx)), 6:7) expect_equal(levels(tfx), c("t1", "t2", "3", "4", "t5")) tfx <- to_factor(x1, nolabel_to_na = TRUE) expect_equal(which(is.na(tfx)), c(3, 5, 6)) expect_equal(levels(tfx), c("t1", "t2", "t5", "Missing")) tfx <- to_factor(x1[1:3], drop_unused_labels = FALSE) expect_equal(levels(tfx), c("t1", "t2", "3", "t5", "Missing")) tfx <- to_factor(x1[1:3], drop_unused_labels = TRUE) expect_equal(levels(tfx), c("t1", "t2", "3")) }) test_that("to_factor parameters : sort_levels + levels", { x1 <- haven::labelled_spss( c(1, 2, 3, 5, 4, NA, 99), c(t1 = 1, t2 = 2, t5 = 5, Missing = 99), na_value = 99 ) tfx <- to_factor(x1, sort_levels = "auto") expect_equal(levels(tfx), c("t1", "t2", "3", "4", "t5", "Missing")) tfx <- to_factor(x1, sort_levels = "none") expect_equal(levels(tfx), c("t1", "t2", "t5", "Missing", "3", "4")) tfx <- to_factor(x1, sort_levels = "labels") expect_equal(levels(tfx), c("3", "4", "Missing", "t1", "t2", "t5")) tfx <- to_factor(x1, sort_levels = "values") expect_equal(levels(tfx), c("t1", "t2", "3", "4", "t5", "Missing")) tfx <- to_factor(x1, levels = "labels") expect_equal(levels(tfx), c("t1", "t2", "3", "4", "t5", "Missing")) tfx <- to_factor(x1, levels = "values") expect_equal(levels(tfx), c("1", "2", "3", "4", "5", "99")) tfx <- to_factor(x1, levels = "prefixed") expect_equal( levels(tfx), c("[1] t1", "[2] t2", "[3] 3", "[4] 4", "[5] t5", "[99] Missing") ) }) test_that("to_factor() and tagged NAs", { x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA) val_labels(x) <- c( yes = 1, no = 2, missing = tagged_na("a"), toto = NA ) expect_equal( to_factor(x), structure(c(1L, 2L, NA, 1L, NA, 2L, NA, NA), .Label = c("yes", "no"), class = "factor") ) expect_equal( to_factor(x, explicit_tagged_na = TRUE), structure(c(1L, 2L, 4L, 1L, 5L, 2L, 4L, 3L), .Label = c("yes", "no", "toto", "missing", "NA(z)"), class = "factor") ) }) # to_character ----------------------------------------------------------------- test_that("to_character produce an appropriate character vector", { x <- labelled(c(1, 1, 2), c(yes = 1, no = 2)) expect_equal(class(to_character(x)), "character") expect_equal(to_character(x), c("yes", "yes", "no")) }) test_that("to_character preserves variable label", { x <- labelled(c(1, 1, 2), c(yes = 1, no = 2)) var_label(x) <- "yes/no" expect_equal(var_label(to_character(x)), var_label(x)) }) test_that("to_character produce an appropriate character vector", { x <- labelled(c(1, 1, 2), c(yes = 1, no = 2)) expect_equal(class(to_character(x)), "character") expect_equal(to_character(x), c("yes", "yes", "no")) }) test_that("to_character default (100%)", { x <- 1:3 expect_equal(class(to_character(x)), "character") expect_equal(to_character(x), as.character(x)) }) test_that("to_character.double and explicit_tagged_na", { x <- c(1:3, tagged_na("a"), tagged_na("z")) expect_equal( to_character(x), c("1", "2", "3", NA, NA) ) expect_equal( to_character(x, explicit_tagged_na = TRUE), c("1", "2", "3", "NA(a)", "NA(z)") ) }) # set_value_labels and add_value_labels --------------------------------------- test_that("set_value_labels replaces all value labels", { df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) df <- set_value_labels( df, s1 = c(Male = "M", Female = "F"), s2 = c(Yes = 1, No = 2) ) expect_equal(val_labels(df$s1), c(Male = "M", Female = "F")) expect_equal(val_labels(df$s2), c(Yes = 1, No = 2)) df <- set_value_labels(df, s2 = c(Yes = 1, Unknown = 9)) expect_equal(val_labels(df$s2), c(Yes = 1, Unknown = 9)) v <- set_value_labels(1:10, c(low = 1, high = 10)) expect_equal(val_labels(v), c(low = 1, high = 10)) v <- set_value_labels(1:10, low = 1, high = 10) expect_equal(val_labels(v), c(low = 1, high = 10)) v <- set_value_labels(1:10, .labels = c(low = 1, high = 10)) expect_equal(val_labels(v), c(low = 1, high = 10)) v <- set_value_labels(v, NULL) expect_null(val_labels(v)) }) test_that("set_value_labels errors", { df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) expect_error( df %>% set_value_labels( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2) ) ) expect_error( df %>% set_value_labels( .labels = list( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2) ) ) ) # no error if .strict = FALSE expect_error( df %>% set_value_labels( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2), .strict = FALSE ), NA ) expect_error( df %>% set_value_labels( .labels = list( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2) ), .strict = FALSE ), NA ) }) test_that("add_value_labels errors", { df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) expect_error( df %>% add_value_labels( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2) ) ) expect_error( df %>% add_value_labels( .labels = list( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2) ) ) ) # no error if .strict = FALSE expect_error( df %>% add_value_labels( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2), .strict = FALSE ), NA ) expect_error( df %>% add_value_labels( .labels = list( s1 = c(Male = "M", Female = "F"), s3 = c(Yes = 1, No = 2) ), .strict = FALSE ), NA ) expect_error(add_value_labels(df, s1 = c("F", Male = "M"))) }) test_that("add_value_labels and remove_value_labels updates the list of value labels", { # nolint df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) df <- set_value_labels( df, s1 = c(Male = "M", Female = "F"), s2 = c(Yesss = 1, No = 2) ) df <- add_value_labels(df, s2 = c(Yes = 1, Unknown = 9)) expect_equal(val_labels(df$s2), c(Yes = 1, No = 2, Unknown = 9)) df <- remove_value_labels(df, s2 = 9) expect_equal(val_labels(df$s2), c(Yes = 1, No = 2)) expect_error(remove_value_labels(df, 9)) v <- set_value_labels(1:10, low = 1, high = 10) v <- add_value_labels(v, middle = 5) v <- remove_value_labels(v, 10) expect_equal(val_labels(v), c(low = 1, middle = 5)) }) # set_variable_labels -------------------------------------------------------- test_that("set_variable_labels updates variable labels", { df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) df <- set_variable_labels(df, s1 = "Sex", s2 = "Question") expect_equal(var_label(df$s1), "Sex") df <- set_variable_labels(df, s2 = NULL) expect_null(var_label(df$s2)) }) # missing values -------------------------------------------------------------- test_that("it is possible to define missing values if no value labels were defined", {# nolint x <- c(1, 2, 2, 9) na_values(x) <- 9 expect_equal(na_values(x), 9) x <- c(1, 2, 2, 9) na_range(x) <- 9:10 expect_equal(na_range(x), 9:10) }) test_that("na_values and na_range keep variable label", { vl <- "variable label" x <- 1:9 var_label(x) <- vl na_values(x) <- 8 na_range(x) <- c(9, Inf) expect_equal(var_label(x), vl) }) # recode (dplyr) --------------------------------------------------------------- test_that("dplyr::recode could be applied to numeric labelled vector", { x <- dplyr::recode(labelled(1:3, c(yes = 1, no = 2)), `3` = 2L) expect_equal(x, labelled(c(1L, 2L, 2L), c(yes = 1, no = 2))) }) test_that("dplyr::recode could be applied to character labelled vector", { x <- dplyr::recode( labelled(c("a", "b", "c"), c(yes = "a", no = "b")), c = "b" ) expect_equal(x, labelled(c("a", "b", "b"), c(yes = "a", no = "b"))) }) # update_labelled ---------------------------------------- test_that("update_labelled update previous haven's labelled objects but not Hmisc's labelled objects", { # nolint vhaven <- structure( 1:4, label = "label", labels = c(No = 1, Yes = 2), class = "labelled" ) vHmisc <- structure(1:4, label = "label", class = "labelled") expect_s3_class(update_labelled(vhaven), "haven_labelled") expect_s3_class(update_labelled(vHmisc), "labelled") df <- dplyr::tibble(vhaven, vHmisc) expect_s3_class(update_labelled(df)$vhaven, "haven_labelled") expect_s3_class(update_labelled(df)$vHmisc, "labelled") }) test_that("update_labelled update to haven_labelled_spss if there are na values", { #nolint v1 <- structure(1:4, label = "label", labels = c(No = 1, Yes = 2), na_values = c(8, 9), class = c("labelled_spss", "labelled") ) v2 <- structure(1:4, label = "label", labels = c(No = 1, Yes = 2), na_range = c(8, 9), class = c("labelled_spss", "labelled") ) expect_s3_class(update_labelled(v1), "haven_labelled_spss") expect_s3_class(update_labelled(v1), "haven_labelled_spss") }) test_that("update_labelled preserve variable and value labels", { v <- structure( 1:4, label = "variable label", labels = c(No = 1, Yes = 2), class = "labelled" ) expect_equal(var_label(update_labelled(v)), "variable label") expect_equal(val_labels(update_labelled(v)), c(No = 1, Yes = 2)) }) test_that("update_labelled do nothing if it's not a labelled vector", { x <- 1:10 expect_equal(update_labelled(x), x) }) test_that("update_labelled works with labelled from haven 2.0", { data(x_haven_2.0) x <- labelled(c(1, 2, 1, 2, 10, 9), c(Unknown = 9, Refused = 10)) expect_false(identical(x, x_haven_2.0)) up_x_haven_2.0 <- update_labelled(x_haven_2.0) expect_equal(x, up_x_haven_2.0) data(x_spss_haven_2.0) x2 <- labelled_spss( 1:10, c(Good = 1, Bad = 8), na_range = c(9, Inf), label = "Quality rating" ) expect_false(identical(x2, x_spss_haven_2.0)) up_x_spss_haven_2.0 <- update_labelled(x_spss_haven_2.0) expect_equal(x2, up_x_spss_haven_2.0) }) # remove_attributes ------------------------------------------------------------ test_that("remove_attributes does not transform characters into factors", { d <- data.frame( ch = structure(letters[1:2], some_attribute = TRUE), stringsAsFactors = FALSE ) d <- remove_attributes(d, "some_attribute") expect_true(is.character(d$ch)) }) # unlabelled ------------------------------------------------------------------ test_that("unlabelled works correctly", { df <- data.frame( a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), c = labelled(c("a", "a", "b", "c"), labels = c(No = "a", Yes = "b")), stringsAsFactors = FALSE ) df <- unlabelled(df) expect_equal(class(df$a), "numeric") expect_s3_class(df$b, "factor") expect_equal(class(df$c), "character") v <- labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)) expect_s3_class(unlabelled(v), "factor") v <- labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)) expect_false(inherits(unlabelled(v), "haven_labelled")) expect_false(is.factor(unlabelled(1:4))) }) # remove_label ------------------------------------------ test_that("remove_label works correctly", { x <- c(1, 2, 2, 9) na_values(x) <- 9 val_labels(x) <- c(yes = 1, no = 2) var_label(x) <- "A test variable" expect_false(inherits(remove_labels(x), "haven_labelled")) expect_null(var_label(remove_labels(x))) expect_equal( var_label(remove_labels(x, keep_var_label = TRUE)), var_label(x) ) }) # recode -------------------------------------------------------------- test_that("dplyr::recode works properly with labelled vectors", { x <- labelled(1:3, c(yes = 1, no = 2)) r <- dplyr::recode(x, `3` = 2L) expect_equal(r, labelled(c(1L, 2L, 2L), val_labels(x))) r <- dplyr::recode(x, `3` = 2L, .keep_value_labels = FALSE) expect_equal(r, c(1L, 2L, 2L)) expect_warning(dplyr::recode(x, `3` = "a", .default = "b")) x <- labelled(1:4, c(a = 1, b = 2, c = 3, d = 4)) r <- dplyr::recode( x, `1` = 1L, `2` = 1L, `3` = 2L, `4` = 2L, .combine_value_labels = TRUE ) expect_equal(val_labels(r), c("a / b" = 1L, "c / d" = 2L)) r <- dplyr::recode(x, `2` = 1L, `4` = 3L, .combine_value_labels = TRUE) expect_equal(val_labels(r), c("a / b" = 1L, "c / d" = 3L)) r <- dplyr::recode( x, `2` = 1L, `4` = 3L, .combine_value_labels = TRUE, .sep = " or " ) expect_equal(val_labels(r), c("a or b" = 1L, "c or d" = 3L)) y <- labelled(1:4, c(a = 1)) r <- dplyr::recode(y, `2` = 1L, `4` = 3L, .combine_value_labels = TRUE) expect_equal(val_labels(r), c(a = 1L)) }) # tidy dots -------------------------------------------------------------- test_that("functions with dots accept tidy evaluation (`!!!` operator)", { df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) variable_list <- list(s1 = "Sex", s2 = "Question") df <- set_variable_labels(df, !!!variable_list) expect_equal(var_label(df$s1), "Sex") expect_equal(var_label(df$s2), "Question") df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) labels_list <- list( s1 = c(Male = "M", Female = "F"), s2 = c(Yes = 1, No = 2) ) df <- set_value_labels(df, !!!labels_list) expect_equal(val_labels(df$s1), c(Male = "M", Female = "F")) expect_equal(val_labels(df$s2), c(Yes = 1, No = 2)) df <- data.frame( s1 = c("M", "M", "F"), s2 = c(1, 1, 2), stringsAsFactors = FALSE ) df <- set_value_labels( df, s1 = c(Male = "M", Female = "F"), s2 = c(Yesss = 1, No = 2) ) added_values_list <- list(s2 = c(Yes = 1, Unknown = 9)) df <- add_value_labels(df, !!!added_values_list) expect_equal(val_labels(df$s2), c(Yes = 1, No = 2, Unknown = 9)) removed_values_list <- list(s2 = 9) df <- remove_value_labels(df, !!!removed_values_list) expect_equal(val_labels(df$s2), c(Yes = 1, No = 2)) }) # drop_unused_value_labels ------------------------------------------------ test_that("drop_unused_value_labels works properly with data.frame", { x <- labelled(c(1, 2, 2, 1), c(yes = 1, no = 2, maybe = 3)) y <- 1:4 df <- data.frame(x = x, y = y, stringsAsFactors = FALSE) ddf <- drop_unused_value_labels(df) expect_false(identical(ddf$x, x)) expect_equal(ddf$y, y) expect_false(identical(val_labels(ddf$x), val_labels(x))) expect_equal(val_labels(ddf$x), val_labels(x)[-3]) }) # nolabel_to_na ----------------------------------------------------------- test_that("nolabel_to_na works properly", { x <- labelled(c(1, 2, 9, 1, 9), c(yes = 1, no = 2)) y <- 1:5 df <- data.frame(x = x, y = y, stringsAsFactors = FALSE) nldf <- nolabel_to_na(df) expect_false(identical(nldf$x, x)) expect_equal(nldf$y, y) expect_equal(which(is.na(nldf$x)), c(3L, 5L)) }) # val_labels_to_na ----------------------------------------------------------- test_that("val_labels_to_na works properly", { x <- labelled(c(1, 2, 9, 1, 9), c(dk = 9)) y <- 1:5 df <- data.frame(x = x, y = y, stringsAsFactors = FALSE) vldf <- val_labels_to_na(df) expect_false(identical(vldf$x, x)) expect_equal(vldf$y, y) expect_null(val_labels(vldf$x)) expect_equal(which(is.na(vldf$x)), c(3L, 5L)) }) # names_prefixed_by_values ------------------------------------------------ test_that("names_prefixed_by_values works properly", { df <- dplyr::tibble( c1 = labelled(c("M", "M", "F"), c(Male = "M", Female = "F")), c2 = labelled(c(1, 1, 2), c(Yes = 1, No = 2)), ) res_names_prefixed <- list( c1 = c("[M] Male", "[F] Female"), c2 = c("[1] Yes", "[2] No") ) expect_equivalent( names_prefixed_by_values(val_labels(df)), res_names_prefixed ) expect_true(is.null(names_prefixed_by_values(NULL))) }) test_that("null_action in var_label() works as expected", { df <- datasets::iris %>% set_variable_labels( Petal.Length = "length of petal", Petal.Width = "width of petal" ) expect_equal( var_label(df), list( Sepal.Length = NULL, Sepal.Width = NULL, Petal.Length = "length of petal", Petal.Width = "width of petal", Species = NULL ) ) expect_equal( var_label(df, null_action = "fi"), list( Sepal.Length = "Sepal.Length", Sepal.Width = "Sepal.Width", Petal.Length = "length of petal", Petal.Width = "width of petal", Species = "Species" ) ) expect_equal( var_label(df, null_action = "skip"), list( Petal.Length = "length of petal", Petal.Width = "width of petal" ) ) expect_error(var_label(df$Species, null_action = "skip")) }) test_that("var_label works with packed columns", { d <- iris %>% tidyr::as_tibble() %>% tidyr::pack( Sepal = starts_with("Sepal"), Petal = starts_with("Petal"), .names_sep = "." ) d <- d %>% set_variable_labels(Sepal = "Label of the Sepal df-column") expect_equal( label_attribute(d$Sepal), "Label of the Sepal df-column" ) d$Petal <- d$Petal %>% set_variable_labels( Length = "Petal length", Width = "Petal width" ) expect_equal( label_attribute(d$Petal$Length), "Petal length" ) expect_equal( length(var_label(d)), 3L ) expect_equal( length(var_label(d, recurse = TRUE)), 3L ) expect_equal( length(var_label(d, recurse = TRUE, unlist = TRUE)), 5L ) }) labelled/tests/testthat/test-recode_if.r0000644000176200001440000000277714357761455020135 0ustar liggesuserscontext("Test recode_if()") test_that("recode_if() works as expected", { x <- labelled(c(1, 2, 2, 9), c(yes = 1, no = 2)) y <- x %>% recode_if(x == 9, NA) expect_equal( y, labelled(c(1, 2, 2, NA), c(yes = 1, no = 2)) ) y <- x %>% recode_if(1:4 < 3, 11:14) expect_equal( y, labelled(c(11, 12, 2, 9), c(yes = 1, no = 2)) ) x <- c("A", "B", "C") expect_equal( x %>% recode_if(c(TRUE, FALSE, NA), "Z"), c("Z", "B", "C") ) expect_equal( x %>% recode_if(c(TRUE, FALSE, NA), 0), c("0", "B", "C") ) }) test_that("recode_if() preserve value and variable labels", { x <- labelled_spss(c(1, 2, 2, 8, 9), c(yes = 1, no = 2), na_values = 9) var_label(x) <- "variable label" y <- x %>% recode_if(unclass(x) == 8, NA) expect_equal(var_label(x), var_label(y)) expect_equal(val_labels(x), val_labels(y)) expect_equal(na_values(x), na_values(y)) expect_equal(na_range(x), na_range(y)) }) test_that("recode_if() checks", { expect_error( 1:3 %>% recode_if(c(TRUE, FALSE, NA), 9L), NA ) expect_error( 1:3 %>% recode_if(c(TRUE, FALSE, NA), 11:13), NA ) expect_error( 1:3 %>% recode_if(c(TRUE, FALSE, "NA"), 9) ) expect_error( 1:3 %>% recode_if(c(TRUE, FALSE), 9) ) expect_error( 1:3 %>% recode_if(c(TRUE, FALSE, NA), 10:11) ) expect_warning( 1:3 %>% recode_if(c(TRUE, FALSE, NA), "char") ) expect_warning( 1:3 %>% recode_if(c(TRUE, FALSE, NA), 9) ) }) labelled/tests/testthat.R0000644000176200001440000000007614357761455015167 0ustar liggesuserslibrary(testthat) library(labelled) test_check("labelled") labelled/vignettes/0000755000176200001440000000000014444575742014046 5ustar liggesuserslabelled/vignettes/intro_labelled.Rmd0000644000176200001440000003663414357761455017506 0ustar liggesusers--- author: "Joseph Larmarange" title: "Introduction to labelled" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Introduction to labelled} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- The purpose of the **labelled** package is to provide functions to manipulate metadata as variable labels, value labels and defined missing values using the `haven_labelled` and `haven_labelled_spss` classes introduced in `haven` package. These classes allow to add metadata (variable, value labels and SPSS-style missing values) to vectors. It should be noted that **value labels** doesn't imply that your vectors should be considered as categorical or continuous. Therefore, value labels are not intended to be use for data analysis. For example, before performing modeling, you should convert vectors with value labels into factors or into classic numeric/character vectors. Therefore, two main approaches could be considered. ![Two main approaches](approaches.png){width=100%} In **approach A**, `haven_labelled` vectors are converted into factors or into numeric/character vectors just after data import, using `unlabelled()`, `to_factor()` or `unclass()`. Then, data cleaning, recoding and analysis are performed using classic **R** vector types. In **approach B**, `haven_labelled` vectors are kept for data cleaning and coding, allowing to preserved original recoding, in particular if data should be reexported after that step. Functions provided by `labelled` will be useful for managing value labels. However, as in approach A, `haven_labelled` vectors will have to be converted into classic factors or numeric vectors before data analysis (in particular modeling) as this is the way categorical and continuous variables should be coded for analysis functions. ## Variable labels A variable label could be specified for any vector using `var_label()`. ```{r} library(labelled) var_label(iris$Sepal.Length) <- "Length of sepal" ``` It's possible to add a variable label to several columns of a data frame using a named list. ```{r} var_label(iris) <- list( Petal.Length = "Length of petal", Petal.Width = "Width of Petal" ) ``` To get the variable label, simply call `var_label()`. ```{r} var_label(iris$Petal.Width) var_label(iris) ``` To remove a variable label, use `NULL`. ```{r} var_label(iris$Sepal.Length) <- NULL ``` In **RStudio**, variable labels will be displayed in data viewer. ```{r, eval=FALSE} View(iris) ``` You can display and search through variable names and labels with `look_for()`: ```{r} look_for(iris) look_for(iris, "pet") look_for(iris, details = FALSE) ``` ## Value labels The first way to create a labelled vector is to use the `labelled()` function. It's not mandatory to provide a label for each value observed in your vector. You can also provide a label for values not observed. ```{r} v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 8, refused = 9) ) v ``` Use `val_labels()` to get all value labels and `val_label()` to get the value label associated with a specific value. ```{r} val_labels(v) val_label(v, 8) ``` `val_labels()` could also be used to modify all the value labels attached to a vector, while `val_label()` will update only one specific value label. ```{r} val_labels(v) <- c(yes = 1, nno = 3, bug = 5) v val_label(v, 3) <- "no" v ``` With `val_label()`, you can also add or remove specific value labels. ```{r} val_label(v, 2) <- "maybe" val_label(v, 5) <- NULL v ``` To remove all value labels, use `val_labels()` and `NULL`. The `haven_labelled` class will also be removed. ```{r} val_labels(v) <- NULL v ``` Adding a value label to a non labelled vector will apply `haven_labelled` class to it. ```{r} val_label(v, 1) <- "yes" v ``` Note that applying `val_labels()` to a factor will generate an error! ```{r, error = TRUE} f <- factor(1:3) f val_labels(f) <- c(yes = 1, no = 3) ``` You could also apply `val_labels()` to several columns of a data frame. ```{r} df <- data.frame(v1 = 1:3, v2 = c(2, 3, 1), v3 = 3:1) val_label(df, 1) <- "yes" val_label(df[, c("v1", "v3")], 2) <- "maybe" val_label(df[, c("v2", "v3")], 3) <- "no" val_labels(df) val_labels(df[, c("v1", "v3")]) <- c(YES = 1, MAYBE = 2, NO = 3) val_labels(df) val_labels(df) <- NULL val_labels(df) val_labels(df) <- list(v1 = c(yes = 1, no = 3), v2 = c(a = 1, b = 2, c = 3)) val_labels(df) ``` ## Sorting value labels Value labels are sorted by default in the order they have been created. ```{r} v <- c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA) val_label(v, 1) <- "yes" val_label(v, 3) <- "no" val_label(v, 9) <- "refused" val_label(v, 2) <- "maybe" val_label(v, 8) <- "don't know" v ``` It could be useful to reorder the value labels according to their attached values, with `sort_val_labels()`. ```{r} sort_val_labels(v) sort_val_labels(v, decreasing = TRUE) ``` If you prefer, you can also sort them according to the labels. ```{r} sort_val_labels(v, according_to = "l") ``` ## User defined missing values (SPSS's style) `haven` (>= 2.0.0) introduced an additional `haven_labelled_spss` class to deal with user defined missing values. In such case, additional attributes will be used to indicate with values should be considered as missing, but such values will not be stored as internal `NA` values. You should note that most R function will not take this information into account. Therefore, you will have to convert missing values into `NA` if required before analysis. These defined missing values could co-exist with internal `NA` values. It is possible to manipulate this missing values with `na_values()` and `na_range()`. Note that `is.na()` will return `TRUE` as well for user-defined missing values. ```{r} v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) v na_values(v) <- 9 na_values(v) v is.na(v) na_values(v) <- NULL v na_range(v) <- c(5, Inf) na_range(v) v ``` Since version 2.1.0, it is not mandatory to define at least one value label before defining missing values. ```{r} x <- c(1, 2, 2, 9) na_values(x) <- 9 x ``` To convert user defined missing values into `NA`, simply use `user_na_to_na()`. ```{r} v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v v2 <- user_na_to_na(v) v2 ``` You can also remove user missing values definition without converting these values to `NA`. ```{r} v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v v2 <- remove_user_na(v) v2 ``` or ```{r} v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v na_values(v) <- NULL v ``` ## Other conversion to NA In some cases, values who don't have an attached value label could be considered as missing. `nolabel_to_na()` will convert them to `NA`. ```{r} v <- labelled(c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, maybe = 2, no = 3)) v nolabel_to_na(v) ``` In other cases, a value label is attached only to specific values that corresponds to a missing value. For example: ```{r} size <- labelled(c(1.88, 1.62, 1.78, 99, 1.91), c("not measured" = 99)) size ``` In such cases, `val_labels_to_na()` could be appropriate. ```{r} val_labels_to_na(size) ``` These two functions could also be applied to an overall data frame. Only labelled vectors will be impacted. ## Converting to factor A labelled vector could easily be converted to a factor with `to_factor()`. ```{r} v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 8, refused = 9) ) v to_factor(v) ``` The `levels` argument allows to specify what should be used as the factor levels, i.e. the labels (default), the values or the labels prefixed with values. ```{r} to_factor(v, levels = "v") to_factor(v, levels = "p") ``` The `ordered` argument will create an ordinal factor. ```{r} to_factor(v, ordered = TRUE) ``` The argument `nolabel_to_na` specify if the corresponding function should be applied before converting to a factor. Therefore, the two following commands are equivalent. ```{r} to_factor(v, nolabel_to_na = TRUE) to_factor(nolabel_to_na(v)) ``` `sort_levels` specifies how the levels should be sorted: `"none"` to keep the order in which value labels have been defined, `"values"` to order the levels according to the values and `"labels"` according to the labels. `"auto"` (default) will be equivalent to `"none"` except if some values with no attached labels are found and are not dropped. In that case, `"values"` will be used. ```{r} to_factor(v, sort_levels = "n") to_factor(v, sort_levels = "v") to_factor(v, sort_levels = "l") ``` The function `to_labelled()` could be used to turn a factor into a labelled numeric vector. ```{r} f <- factor(1:3, labels = c("a", "b", "c")) to_labelled(f) ``` Note that `to_labelled(to_factor(v))` will not be equal to `v` due to the way factors are stored internally by **R**. ```{r} v to_labelled(to_factor(v)) ``` ## Other type of conversions You can use `to_character()` for converting into a character vector instead of a factor. ```{r} v to_character(v) ``` To remove the `haven_class`, you can simply use `unclass()`. ```{r} unclass(v) ``` Note that value labels will be preserved as an attribute to the vector. ```{r} remove_val_labels(v) ``` To remove value labels, use `remove_val_labels()`. ```{r} remove_val_labels(v) ``` Note that if your vector does have user-defined missing values, you may also want to use `remove_user_na()`. ```{r} x <- c(1, 2, 2, 9) na_values(x) <- 9 val_labels(x) <- c(yes = 1, no = 2) var_label(x) <- "A test variable" x remove_val_labels(x) remove_user_na(x) remove_user_na(x, user_na_to_na = TRUE) remove_val_labels(remove_user_na(x)) unclass(x) ``` You can remove all labels and user-defined missing values with `remove_labels()`. Use `keep_var_label = TRUE` to preserve only variable label. ```{r} remove_labels(x, user_na_to_na = TRUE) remove_labels(x, user_na_to_na = TRUE, keep_var_label = TRUE) ``` ## Conditional conversion to factors{#unlabelled} For any analysis, it is the responsibility of user to identify which labelled numeric vectors should be considered as **categorical** (and therefore converted into factors using `to_factor()`) and which variables should be treated as **continuous** (and therefore unclassed into numeric using `base::unclass()`). It should be noted that most functions expect categorical variables to be coded as factors. It includes most modeling functions (such as `stats::lm()` or `stats::glm()`) or plotting functions from `ggplot2`. In most of cases, if data documentation was properly done, categorical variables corresponds to vectors where all observed values have a value label while vectors where only few values have a value label should be considered as continuous. In that situation, you could apply the `unlabelled()` method to an overall data frame. By default, `unlabelled()` works as follow: - if a column doesn't inherit the `haven_labelled` class, it will be not affected; - if all observed values have a corresponding value label, the column will be converted into a factor (using `to_factor()`); - otherwise, the column will be unclassed (and converted back to a numeric or character vector by applying `base::unclass()`). ```{r} df <- data.frame( a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), c = labelled(c(1, 1, 2, 2), labels = c(No = 1, Yes = 2, DK = 3)), d = labelled(c("a", "a", "b", "c"), labels = c(No = "a", Yes = "b")), e = labelled_spss( c(1, 9, 1, 2), labels = c(No = 1, Yes = 2), na_values = 9 ) ) df %>% look_for() unlabelled(df) %>% look_for() unlabelled(df, user_na_to_na = TRUE) %>% look_for() unlabelled(df, drop_unused_labels = TRUE) %>% look_for() ``` ## Importing labelled data In **haven** package, `read_spss`, `read_stata` and `read_sas` are natively importing data using the `labelled` class and the `label` attribute for variable labels. Functions from **foreign** package could also import some metadata from **SPSS** and **Stata** files. `to_labelled` can convert data imported with **foreign** into a labelled data frame. However, there are some limitations compared to using **haven**: - For **SPSS** files, it will be better to set `use.value.labels = FALSE`, `to.data.frame = FALSE` and `use.missings = FALSE` when calling `read.spss`. If `use.value.labels = TRUE`, variable with value labels will be converted into factors by `read.spss` (and kept as factors by `foreign_to_label`). If `to.data.frame = TRUE`, meta data describing the missing values will not be imported. If `use.missings = TRUE`, missing values would have been converted to `NA` by `read.spss`. - For **Stata** files, set `convert.factors = FALSE` when calling `read.dta` to avoid conversion of variables with value labels into factors. So far, missing values defined in Stata are always imported as `NA` by `read.dta` and could not be retrieved by `foreign_to_labelled`. The **memisc** package provide functions to import variable metadata and store them in specific object of class `data.set`. The `to_labelled` method can convert a data.set into a labelled data frame. ```{r, eval=FALSE} # from foreign library(foreign) df <- to_labelled(read.spss( "file.sav", to.data.frame = FALSE, use.value.labels = FALSE, use.missings = FALSE )) df <- to_labelled(read.dta( "file.dta", convert.factors = FALSE )) # from memisc library(memisc) nes1948.por <- UnZip("anes/NES1948.ZIP", "NES1948.POR", package = "memisc") nes1948 <- spss.portable.file(nes1948.por) df <- to_labelled(nes1948) ds <- as.data.set(nes19480) df <- to_labelled(ds) ``` ## Using labelled with dplyr/magrittr If you are using the `%>%` operator, you can use the functions `set_variable_labels()`, `set_value_labels()`, `add_value_labels()` and `remove_value_labels()`. ```{r} library(dplyr) df <- data_frame(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) %>% set_variable_labels(s1 = "Sex", s2 = "Question") %>% set_value_labels(s1 = c(Male = "M", Female = "F"), s2 = c(Yes = 1, No = 2)) df$s2 ``` `set_value_labels()` will replace the list of value labels while `add_value_labels()` will update it. ```{r} df <- df %>% set_value_labels(s2 = c(Yes = 1, "Don't know" = 8, Unknown = 9)) df$s2 df <- df %>% add_value_labels(s2 = c(No = 2)) df$s2 ``` You can also remove some variable and/or value labels. ```{r} df <- df %>% set_variable_labels(s1 = NULL) # removing one value label df <- df %>% remove_value_labels(s2 = 2) df$s2 # removing several value labels df <- df %>% remove_value_labels(s2 = 8:9) df$s2 # removing all value labels df <- df %>% set_value_labels(s2 = NULL) df$s2 ``` To convert variables, the easiest is to use `unlabelled()`. ```{r} library(questionr) data(fertility) glimpse(women) glimpse(women %>% unlabelled()) ``` Alternatively, you can use functions as `dplyr::mutate_if()` or `dplyr::mutate_at()`. See the example below. ```{r} glimpse(to_factor(women)) glimpse(women %>% mutate_if(is.labelled, to_factor)) glimpse(women %>% mutate_at(vars(employed:religion), to_factor)) ``` labelled/vignettes/approaches.png0000644000176200001440000016372414357761455016717 0ustar liggesusersPNG  IHDRhtEXtmxfile%3Cmxfile%20host%3D%22Electron%22%20modified%3D%222020-04-28T11%3A17%3A05.579Z%22%20agent%3D%22Mozilla%2F5.0%20(Windows%20NT%2010.0%3B%20Win64%3B%20x64)%20AppleWebKit%2F537.36%20(KHTML%2C%20like%20Gecko)%20draw.io%2F12.6.5%20Chrome%2F80.0.3987.86%20Electron%2F8.0.0%20Safari%2F537.36%22%20etag%3D%22liswx8aIsWLA3o-mdAqL%22%20version%3D%2212.6.5%22%20type%3D%22device%22%3E%3Cdiagram%20id%3D%22ExFkcT_Qc6aa_9KxlopS%22%20name%3D%22Page-1%22%3E7VlNc5swEP01PqZjIP46%2BiNOM01m0ubQppeODAsoFVpGyDbk11dgYcDErps4CY57SbRPK4He29VKuGWNg%2FhSkNC%2FQQdYy2w7ccuatEzTaPcH6l%2BKJCtk0NWAJ6ijnQrgjj5CPlKjc%2BpAVHGUiEzSsArayDnYsoIRIXBZdXORVZ8aEg9qwJ1NWB39Th3pr9C%2B2Svwz0A9P3%2Byka8vILmzXknkEweXJci6aFljgShXrSAeA0vJy3lZjZtu6V2%2FmAAu9xlw05ldX329nRpB%2BCWaX0WTQV%2BedfS7ySRfMDhq%2FdpEIX30kBN2UaAjgXPuQDprW1mFzzViqEBDgQ8gZaLFJHOJCvJlwHQvxFT%2BKLXv06k%2BdbQ1ifXMmZHkBpci%2BVE2SqNSsxiWWfm4OkuauAjnwoYd1OTRRoQHcoefufJLeSs9QGtwCRiAeh%2FlIIARSRfVuCI6PL21X6GgamgR%2F0FQPe%2BCsLl%2B0oRIopCrIFRS1eSuirn0qYS7kGTELFVGV4VzKWNjZCiysZZDoO%2FaCo%2BkwN9Q6unafZi5u%2BhfgJAQ7yRM957r%2FEmq5rLIRiNPMb%2BUibnfwRnuHkvKPD%2F0zT1D32pU6Ju10B8jV3EWUeRZ2WilcdJlakWjmVAtL225xJYooqxvqv7yeQCC2gdNFDCcDvSeSpRBt2eR7mESxWw3LFN6Hz9TrD0z5bxRmWJtKxI2A8Ip9%2Bppoh0E2OhkDkdXR6xuw9LjfJsKRMV7EtHoCEnumA0j2fh%2Fwt3KzWDPzctoVp0ffPQj7jpD3i9rjqZ0v8O9cN%2BLodGsom%2FUr4andT7ePAA0IM2O5i75Dmm279E6j%2Bu3T7PHe2%2FxazxdkN5DspjNgntkg%2FwDTBMk7b2mpht0H05kanozJ7bFz6V70x59c70lnp29VNJs6FAIkpQcQqRcRqWZb1OgtGFs1uXOxhfWv%2FjnF%2FIinFZvUATXeikv2ENO8jK3%2Ba2jAXv5KVznGkBzv0bzMAwFEttX6LDGsVqorBJZJYwjhw12NUQY9bgyGbjpDClp1CZsqOGAOk62eT%2BlW1VZF7nMC8D2zfLZ14V9NTFfTZP6jaykyegUNTHabyjK9or51HYE8ZHekmubUf%2F1NiNlFr%2FMrkp18fu2dfEH%3C%2Fdiagram%3E%3C%2Fmxfile%3E^ IDATx^tT? 4!D{!BBB!J$wM:(M RAzU@;컻n.?~swW!ppo@r@@@@@@@@Yx@@@@@@@@4AB& w@@@@@@@@@ dib0HYx@@@@@@@@4AB& w@@@@@@@@@ dib0HYx@@@@@@@@4AB& w@@@@@@@@@,Ο?Oݺu~A7)///YjbZݻwGyf1KOԶm[~/ 8 RϞ=_MNNN4t3!d?4f5jv̙3)w'&       }f'O+͙3'lg̘A > w޴m6=Z4hd0$Juqԩ8qj֬)bbb}ɓ)gΜ+[bGQ֭@,VZ!!!)SW6G4        i!`VbaYqz%b77|CժUKѧRiҤ ͚5GJѢE}NŊӫgꫯ߅Ս~7b>}k"Cm,Ο?~m . 0Xŋ"cx"URիGnnn)%SZnEgdOa}*f"" t\!X8b߸qg_DDٍXd-[P5,ze5RiU<-.VZ7dkQx@@@@@@@^B)s18 lb7Ef(Uop1XLYx.o86uPXZ,H999-8n իjr͛b ,qBֳg_̍+&eĈBkٲ%q,!?LL g. 㜱06l0˥R2.>WIb[Oa,Ҫɘ`bLߵkv΅co{67N;,*RȲ?v  `ֶL%xU YxOtn\)b,%Y3 *-,R \rbᅨܞ ϫAԴiS*_VjY Ydq_ˌf-TBr)S,.ӎ=*lIB~ΐ nN2wPTS1,,xP5.vl藺(Jm *$S FZT8NgVR`զMȡCUV^ѢE]F<~V"gx]VJu:4(U+:pmV\+XZb[|       "BZr%uAo*HsevCGXQC͛7,dKiӦeS Y<ʕ+ܹsS̙c=%$$PuY"d,.˟R|Rv'Ov׮]7=}BCC)***Xx|;wN|$+YbkB3VXKLLx YJˤnhhf?(q Ν;nv6mu> =ףG!^qaWw}bccBbbk͐E,'ݻ-#s<)eT:/^ ~;v"[*U̳d8.Ìx<\~Y[?\7[ , ʛ7-6@@@@@@@@4JBFFBVF[q@@@@@@@4JBFFBVF[q@@@@@@@4J Ya8(Y0>Yx#@@@@@@@@4AB& w@@@@@@@@@pM0I .XA k+j #͑Mءb  gݛ:nDB#VA:pZ H dYY @gj`j# Yk9Ҹ0 /-AXFBVk@F`oY-g?{O}6ؓ ds7$:Cm'Y"j= >?uPIB=odԁj= @RG9uPI?u `OI} $,uPIB:ȩ `O8O?{eOlDu !d `OEN?{O}6ؓ,{G `#8Ȩ !K?{>.r6ؓcSA dٓ>AFHY6ؓ,uqSApSG:~ $!˞7؈2@BRA d:~ $:Cm'YAFpQB:~ $!K}\Cm'j= @Ȳ'} 6":Cm'Y"j= >?uPIB=odԁj= @RG9uPI?u `OI} $,uPIB:ȩ `O8O?{eOlDu !d `OEN?{O}6ؓ,{G `#8Ȩ !K?{>.r6ؓcSA dٓ>AFHY6ؓ,uqSApSG:~ $!˞7؈2@BRA d:~ $:Cm'YAFpQB:~ $!K}\Cm'j= @Ȳ'} 6":Zz ۴'`+S>VZ7kC+V7|S VCG^7+W/ӷ;R){79 Yhk"ӁԵum .J~F[[9rSڼn%9,M+VYjZb M~4lx^KK/W+wү;O j}S7{mA@AF݋U!4s [<.KoY? fmDl3Hc-lKY/Ҥ詔;Wni BvmB:ZY*d1~qPܹtďJUu|#[ƒ"S6^r|{G&SyX4 FM4]ܤղik:|'A~ʒ93! G"@O_O'b]}iWeJyG,)]^|kEV8_y-ݻwW=$s&7CfaIy?Dcp(Кؿ6,uk\Z,diތ *,|f&؈ϕ}No.=~;9;kc(nL 8~L3}ڞ%+xpn vUxIb-x>,{woST/^@)Syܣzh Q_D0[8'QW-ٸ-KC/8J'Lx Ar◣q,DoN,)WeTߐ kRW yzu=vmDUׄe˗S dE@ @RFZ>}J149-,kʉS3q@jۢQpȏ4:.]HQ1Ժy[хNvTJacDq84v|{0[~=!Pqzݴm @A~uQr(KukĎϟYI!EN::ϟ?73Iį(jӢ.oO҈`oz*MOM mLzG 69 B[V$ *<]iH!1}O)t`jkjʵY8RYVCYLjiBxOP$eɒ\uտx*TRF젉ɋyκLZ۞zy-!vlӱmxa HA3Pe Y,;f%=:šSbi8r ʔ0ؚ*p`jߥ}3 gq ^Iz\mש)p a8w5hJ1'd3n>4[fʹ?{LNZ@7{>r3QBlV.꾁j?uGm6Y^?dԽZ=iÖu4cjTI'Ba3 Lofk-XLZ3zQ YɓfRӆ86s9+.\K0 ۷D$tG[qm[sohN.Wt]rp覎F%T9ePu1!EAXʛ}߸y<]3z4lc)+ZGӔ496Z5kcv=M Y֎l,u \Z,)H9iEH[KT_~ĖAXp=S+)7ytLD[7#O4WqiA{y 1~&EО][)<:*VW˗4q\qF['$-9ՓACg愬FR: ._wF@IԩC.ñ?yD._3~wl7&djGYZ,e1+s--WZ4V!FC󵵾i~0PAB x BBВeN4 sL Yܞ%̙XYR@dEYT8 2c샃֏Ŭd=e ~G֎ ,u\ZilpiF2ey w \xTȲW%dq``o̒' q9j\yD.u-tT!K]ZhېW @,u%!g*-x,ZT1YԹ]Ws6x!K-L_Lj2U/5!Kf'Y^Շz ?yǨTl8[V؜E/ҥi"<e-k,i.~"~# Yg!K i"!c>%sjF}V>+\%*Rԉ )&\|XȲW%dqr "a֭Z,gΜj֪OtيTԩ/P%DӤPM Y2^8cOQMBA d|D]n.[ʒNlNe>.s4#kr$Mg2F1"CXWZ}[;>K:3UȲ\Z,N_G?YvBSe)L,O~,Դg{Panu1F1Ժ}w;SO›Dc+#SXW\$=k4.a6}Emľ7ZdtkuIWo7CCC,G͌ Ce#hI Y.\}Rhh(խ[%КřfN{uCÃt.qΜ;M>t)@7d-^. B̪EϞ"[BL")nϞ=ȸ0*,vÏ)h?UXU=O<Ⱥt"EC3eѣ)NKMcǏҼE@"K\VEYqUȲQ,"C|h™p08ct3Xppe;K\ մg+%E(cY 9nդ0J4&\&. ~Aڷ=(Yb&T"O?*4mف̉7_=;F2Pf[1 V@CHg2eI>R]xO,i~kj۹^ aK[ {ZvEYn,mۑ{]|AFxXȚ;wBBB&dܽM~MԠN#MTec?"0:g  jA]E;!ԶE{ʚ5sȏ4:CZ!3c?zn ֎' YһǓt4cJ,mߺ^ƱSx)I ]m$е/e/iVD95N"vm:҅3qCٲ+K_˔Hd\MA*( 6n*U޸ ;og2+ A3nnD\Uo_fkIE|-.2yV%'WrGwtÚFǝ+w^1OBj¤%=?q.ጪ)!Kw<0b<ժ&=]OvA9eZ2ʗeNv"JE_]*).q}S)m۰^F^'_[f(SLU!˖WGuҁCŅu KN%MX8╋軝D]kB];tO>.WO:a֏>.Y&|75嫗hޢmBDcህ/v/tg܀9!v VԲiЦgJv iÖutA1j|EZwz.emc wM8 Z֌VB-?_ Y|qc>&exPM[u=}J.҄T/l=+B̄VK,iwKz|Z|Nb QBw6Z(9eڊgӡ{e =ZwN Sھe=-$tI KΑTr9a6Hn[R2u2>"E0yLY$f|9ծׄwP d{4?^"s̜߄(GEƼf3Edf/!˚5KOu dunÞ9=&_}^!kMMg]U/ YV JZZ^;~@Q dt!K.rnb\I@^Vd9RO*?$g'M (]Ϳe/Yj"B2!d1Re~CaT/2A>ׯ^B;mV\~w=bG+/;Y=i(HfL-W;S>Tq b,lwYf1e-+bL2)#ElB㘈fNݍMi )!ݿ-E._>B^V}нnZўŋ%/paAgO %heiY 7=Nc)w*R0O $p\.w(G _,sBS)klfBd& )w(e͖ *aT-Q VZla\5Dz_B%_Y;z, @tֹ6/k֝(Ɲ/Aߤ%-_8$_Gz<8fmB BH,u_zȽ =i@vL^uɕngU(&a}dr8aKɜ46W Bp5­.^ |>tI|o=~JdzZPinϒ/aZ'ۓBְA؂|gker ։;;&rû1!NGŊ#7Mh1%.zr'ح_2\RyB҅!ˌu5S<eEway4:&u{تjќ$ S̔@Ss-d4p`jߥdp8@3ݸ~f/$'sŜY 򕪉/gq-ɑmˌL[gz 0_@ev,ƏEKKu߱U|Y eQfX<`\Vmg lƾl):]7ݟ /qsVY-YE `;l.% AXY3S&Ac֍?i֞ۨ_Դe3~{Z}~,HI"&čYQqmԷ@oake!y8K"#ߓ:k…E07ڿg;\ YR/̲3ge[={={x\c'ΠϿ+~zq -84g$ ,uo.rZ~6,9Vj -6V BK`e[ܻ v U|dVX^9ҾeK%-ԪX|.6r$XM1tEE:%,3fm{Vըy[]2 ه~-E)6`5NH[K1Q^S1$cJZ)W7Gy'.K7):~a7La!ly4?K WPeᱳ!NAԢmi,+,sk(_۷nX$dk?7Ű sB?d[ci#>/n9ufʱ(gSνD\,A])wzz=+in)7_d=Y,.>SN!: `__6YsDMY֓O/-,s 5 RkM'ﵦ;Iee5K˹tef3gg<ڽc q9,{M*B"{)F1gsZZe[g}!Bzi!~֖Xd4#v9pÓ->VlE  oL3#̇չtK !( 7-| %E6]4\ m3)b{BO?gN vMJ+.&oiBK۳ai{ A̍-=zHwnݠsא/Ҋ)!C׊sHZ^ҽ3 4ԉUYN."4Ua ߸%)_I/)| Pqx*bO-m:9Yf,itk/,Z:V-'+KE? YE bތY?,Oׯ]X1~Bg-VZi$@r !v>$ !,u,={_Z,*mzF2t"|91q8~ؗdM{}i!ؕrxgOJ<668֗/H:t/j]Xԃ\}c~:OSECEFVZ+̘!ˌ%UOaUNNgYb*Q*VX_>dk!V-3gq e+RQ@!-|c"B1Ad.%-NzL' BȂg!+dyp<[cArw#K @!Kfbz )  fXRS wy F=k3[۞5B=E$"+!'~"EYֶg{em{iq8{cv]SĨZt>XRl":iJA~ž[k/Iڜl,dqvOV0b8sY&,bg͒f.򕪉_ S ǴrYƲGt8էZd]x^XN}U;.7۷nR%Tt4e-4ercXX$N Yj?YiP}iܹzˑ_z,xM'f:Rxl"Q2lRӞ*dc1=,Kbd]@ً=}J;ۘBtOGeW&1)2G1m(d#-fʜB*{e d7_r|ee/]LZ63u}GǏ'*Ⱥy^JLRݽs%S̈́b[&M Bϗ!#gcG3E c(ž| R@X ծTdT`řMF6bk!_{^G ck\Z0$dݮm4k ڳ76 L݅.Ϟ=[Qb%BWg{џq%h'?1IS)w.O+# YzS^nL}N5k7V4eJ{goV~-1^VF|Y2"C3ƷϘ8! 5qe343i`8֒~iߥq|f.\w41*؃ȔLޣtĉ4q\=cc5Kuhw)"6:t+D!."8+y"͜'!4k]8{?./d͚M7e.!Ԫ}7];;&"I@7PGar\0ZϞ- &,+,s A\FRAz g؇_$Kޱ[??hǶ &|Ad7qsdJ|x9 +\7!ן!(9ax~ ۷/͉DŽع@ m,~_K_&r9_|I 44*;xZ%ʘ2AFZ#XsYޟ(TRvA 4Y8ԉߎ PPv*X6n[OOB -Q}qY=HYRUխ/MKm/kԢQucr}s+-^P]*'Aޟ2 ly^6~=RBEb)r}{te++i-49bŝJPvݨW>"^a{.-YmZz=ʗh2Fgϟm}5ݐii' YiaڳZY*dܕY $ʒ% Sft^ ="W25VػSZf3FҢ`铔?N<ʗAn: }a0s غAg Ka.oٰJ/k+.[!i}1``k@A܍آʝW2&=&dybliҮsoV݃8őCfzwU:Z_7>KIH4sz]=y>^3p n)dmnzݽWʔRi(ʙ3'MDQLjIY)sbJ({[wnQ b"&Phlh!3eJL[ Qv xag̛F[LS(S'WXqʙ#'-]Xpϝ+n {9tG/Gޜ'<73ztmz)Q!khTD=x@W_k}= ,uzKń>HC%>IeqOYRa߶i-͛/bpX2L)Z\dfsxqc8.{uL7RjӱqӋϩg_`,qX!>B01]dtGѿaBcuzB -l*/_Лo)ˁ۷@ÃtG,!+ѣ/[_la%KYVhU*V1T\%qSc)S#d1Z0?ʛ}HQ S'M`Prv FZuԭ c&CGQTf Yǎ%gρTV}]v5WQhb~S !K<"E.B֭D\sgOQ5TbU!: Oc]޺lU8IO݆)f\Hk![ ,~8+ {& *A>td@>_b\9KqArY[Wf.¥BG,p"frP%TtYɎCE6-(4._HN4i%"NaDanB3SmzqS\Y `q E*eY8Ȩ[G?s4Zn;{QӆSLtB vpai 05>̯otBukգ\cLĉ<y0O;(fztY,pK^OvlӡM'9BOkfWۢbJ!+y\e{NŊ ~<'.j,|bHtp<8Ǐ88=sbKPzMD=q!6rY]Fqh]SYd~rXBX_bŖAq57>cakjޕ#ǓV!Kffc"Yl%tqV,X+ᬙ:q !KO^^mKڅs;,Xbe#v9q%"R ﮈŮiq< 9p4LEX+-ؽG8a2X d`JJ9^vںNRg+;a}rm}^m1n֨ E',6M>yW5kAeaцó˘,c%"gTd˱kc?-`hH*T² 5oҒ8%U2aԜ!dY6^*dq<$=;zEj|Zt, ?x8:bdy38,C9r.ٵИ%KbB*,_iiָE;[]L I*3$w=wE03,YIpX-# Yn,`URWʖ.'.X6m KRZd-jլcHZ,q y۵a:n61ׯmQ{wρFcqz+*dIF]6m9_s}l[Yxj"V!KLYMOeAj֪Oe+V%?b%J }RX/ax?[j?9su*]"u*A i 4):&B/~!P UѧFr4U'YlOk|Zz;UQ+EyiXdB4][c `#l̀= @RGQ,j46dn.˝dxS1Eop7'Xd44Zx=w/1|Gy?K~^ς(2,_''O Q)O<Dk,ޤqStA0Lk}0DߖBtc5#߳ePu+-Z?!KP^"dߤ0J4VPr%ڧ5lښc}Biu5٩Ğ7bm"djR8 pטg6SN=i{?tY7k9.FCSsgNÂ6w-dΉ#E`}S.ְSM{5qSUY6:b2^G³3&&<~#R,xyƲLuޣ(k֬R>}J &{(?*Wx &I YRD+TRdTmD"{U Y|9㬅ѡ:؃\}Rd-d<T=7k!gLS sƲwo]4=avuTL[mYieV/r Yak,_5KVJW&0Hw8S.k21a&>k6={*.cDlu1 ŏèRwXa&E&cY ˨Ag-TZ;|mO}d(9ȬiZhk!ZB\+"kC뵵i; !ؙ,u Bvb1S.:4}n2M(&8z4}Nu؃|=Phd<}R}i "k|dעe)zRm.E6FFBRR9sB 2o~= ~C5)f–Cu}GǏ!n޸FS񒟈z 4&.Yj6Β7!*X-s;,qv.ԬuG<{$EAG݄u֛8X8jӱ6_Ia#Ĝ#)sMwq> p 1E.b,>,-5 g;]m07bW%dq@Zt>@oz+GN]cC|zARhV?sA dق";nU ( l=y; ӣ'C-+Qӆ(7e&`<&oFd,S|<.>@= cN>۵CwcF BIt/"K 3Xז% [-_TJjNηIY`a FM4K5{pm[bH,~ ,hoRTʝ+DĹfys@.̴ZjTLJ9 Y?'P!P1+b '-1u̒"DܲreʋW_m֮Yƒ"E,EN)d#H3]00;mZ+D2+/;#DџA;m E\}slB##S)4b#FVLy~@GDעq!憮D]?WV5K)MuՏ {xh!–ZAB̑u8Y 7Cuj*\ gvÆR)sf|9=gV}/U殅r<AB+T(-^R9 g|ovA-V?uFmH du,28dԽ*d~G̠/տ[Y,` OJ!pfm%+ӞUzԫ[_VSa]# ?]KW-հAzzW^+U,`VP];tO>.'8NO95Wӡ#P"Y oZb{Kpʞ=/ȒۼVhܩmҾ;qy*-$"uEvڱ5HXbɢfªzڸmx7Xjת#ulY'Y <!KT^ Y,TRۈD 5*xl,ZxuXYu_j_`C}xڸp4M;vm,2ebO޾e=-$xrצcOcM L>)J*Z0;vl י>~/vXZ|NjҲ=u= J[r-[4vl[/ fUI+Խ?.[(_ì,u;5oK|PΧ\EB%G5eaE:5[6YMi<؈,D3 `Ow!K P2.Y9uP[ [gUj= @Ȳ'} 6":Cm'Y"j;&,3'SV;@>A%XsFGj= @Ȳ'} 6":Cm'Y"j;&_DCzPDl2}\p}d7'S~E9 FRy:~ $!˞7؈2@BRA d:~82>wuMw٘V :~ $!˞7؈2@BRA d:~8Yʏ?|O+Ce0v74qgrd8[-6ؓ,{G `#8Ȩ !K?{>.r6ؓcSA dٓ>AFHY6ؓ,uqSApSG:~ $!˞7؈2@BRA d:~ $:Cm'YAFpQB:~ $!K}\Cm'j= @Ȳ'} 6":Cm'Y"j= >?uPIB=odԁj= @RG9uPI?u `OI} $,uPIB:ȩ `O8O?{eOlDu !d `OEN?{O}6ؓ,{G `#8Ȩ !K?{>.r6ؓcSA dٓ>AFHY6ؓ,uqSApSG:~ $!˞7؈2@BRA d:~ $:Cm'B=AH𪫮 Z[ Yt6 Cch IDATyIȋ؎ֱg7G"8Ҁ0A&̸,븡8Y֭.rqC-p$8YZ H d9Ҋ`,        F @‹        4L$,         !K˄A@;        4L$,         !K˄A@;        4L$,         !K˄A@;        4L$,         !K˄A@;        4L$,         !K˄A@;        4L$,         !K˄A@;        4L$,         !K˄A@;        4L$,         !K˄A@;        4L$,      c,:8hRB "$b"H$+^Jqw)k̾fnv7sy%c !! 5         d94        @@@@@@@@,&t@@@@@@@@BCӄN@pbIYX        ABCL:         ! k@@@@@@@@!@riB'A@@@@@@@ da 8Y1M$IB֥K{ta #FPDDeΜ@ԳgOڶm~ZuKuڕN}O?U.*L,YBzj/o%Tti9s&թS       6%`Tz9tt4p@ 6n/BmӦ ͚5͛Vqc\       @uiܹ39sj֬)ۢGӦMٳ;P!dUT/_Nlu|X\t)yyyӧOů,ZB-9FDrvvX{Xʟ??}TZ$`BLӦMiܹtA)B:t www*ZVSZBCCEsVa+7hƍ@QvѢE_|6lH!!!TbEׯ_S\ݻʕ+TJ_>%/hƍiW{r%Xb-Z,Y$a… x?u֥N:rs֔'d*UJ͛BTdA81 Y\i!?/@@@@@@@lE Y!ɓ'4h ZjE)___?;C5Svm!2`R Yl#ѶÖbqqq/_>ͯBV.]Dz)S \ʕ+G"jtfajҤIBɘ1,Fg̘ap8>׭,a=,JX't,)mڴI|@ Bӝ}#dq۷o'@@@@@@@lE Y!ȑ#Ԯ];up+dw3Zf uQQFʈYaiah (,b [Yu?#(vL2[nXPc4X=z[_1%?׵tblСvdc 3>CJ z/P1\¢Mf-4d!jl Qhx@@@@@@@Dɓe?luŖF3gN=z=,а[]rB `,4oޜ^zEW2pղeKї%J۷TtthNieG]۷o/D?[C }%/ȏR믿v'V1;wr?JO x2eQNWraaaB@d7B.}c0bJ)d- *]["q:~޼yCcǎ &+E.9]!-MQ elĂ{'d,-[kMI)dq,-4uU7Kx auk.QN9vCu"?Rz%c5jZjEˏ-L g-YE@w<        `K,z-~nJ&}R1~޽"9?{8R@lGi8vcaԩS)[l,H%cM%Sƍ@W 'w8V?LG} Çw۞t,`f5L%/--H Y,HĢOrBXLJʐRE)X5U2T?6N8QĐb#,SӭGfnz왰Fck,~ M3C .        %W[{2E.ӳg.^W -ޙb8`Μ950"K)pºSbܞk֬/_^=XkL2{Csfeu]?B) vAM4IQ[Bnl(]Eďʚ5k!׻n:/4э18?,daݘU)u!Ef&dB I;E03ft9 n<:PՅhL2{B^#GUAJ/X@!KWqwwHʒ%Ku-"MrY2e$ug@',qUqB+,-ս{g-TBrӧ ,~tcOrq|cB߫^~-8RaQxQ%$K u%!HC76XZbVJyI"$׋r )#ckQe"@PZIjE*W۶m+ǎ֭[ xEBĿy&q?](]9  A󹎒%KeɭԃwA@@@@@@@\I5kPǎE}+Sf7dk14dڶmKQti9s&թSGSR>T\,Xd"dqE,.˟R|R6+W.nݺѻ+~ $}tE񻄄!^ɇ-(D:} c";cWB]l(       jh YpkKLLull֭\#ճgO!^îj~!EGG " ŮEn6CgSL}[.WWLAB7|ڽ{٪TB`E'~qf2lT6m^[۷ A/_8 !?5jX>S c~ohQ@zFfbNk        Rz}+p]B֒%K(wܖpPtmHo dxA@@@@@@@A @rЉCA@@@@@@@ f%`5!Ay        vJBN         MBVCӄN@pNC0H nD=1fB{"ϼوƼ( `7Ew8RGpsB_A@?YX "g$ c7{"ϼـe7{"!˼,uPlIB:8 `KlIm"$24ؒ?u!d `KO?[eKh,D9u qQA!K?[>4ؒ,[G `!ȩ:~( $O}Y4ؒ,uqSA dْ> ENHdCi%CRA d:~( $!˖6X.r@ J- `SGB:~( $!K}Ci%YABpSuPlI:Ci%YJ- @Ȳ%} ":8Ȩ `KчJ- @RG?uPlIB-m\ԁAF?[>,uPlIB:8 `KlIm"$24ؒ?u!d `KO?[eKh,D9u qQA!K?[>4ؒ,[G `!ȩ:~( $O}Y4ؒ,uqSA dْ> ENHG?tukS P_PAV^)J|m\CN%JSU-Vڊ6^Jo0 T)r-ڊr̝*:z#͠# Y/ݸy6n@}G0ʕ)O_TAmһᆱR:+ԱWr%wv77w{vP=(k֬v׿,un( MBcz.rd YJ:UIcOrtblńв {붗B̄OcG?yL segT@!r*R}~:]tAҡ;CsA6uسu%rv*QPYswi{ ~d@z&!+=>ƞf"n* #?Ѵk&a1u/YF8">)FCRMK+B(pڴ[jX1ye*h,5e=tG^c){Bi`k(UTB"Kݒtѣ486Y==ȩ[~1&dI:ϟ=0Z:?BCc(kT  [?uMz5MMfNʞ}zkQdh uC,9YS!Kݒtѣ486Y==@Ȳp􃌩BןshǔhV\+b8ڴn9۵.]M:ԬuGjվ&ncHw:{ DO_Hm^SRxUZbm߼~quRTlzwDUɹr\]7ʥsؑj_Ԥ=R&-)s,Ie'=r`ݸ~(׶S/jԬV1>U1\:,[(E\.}}Ze RMkE݀zuo`V,ܺ^~%7ă>ΛO_j5,"h:xB:&d; 1X zZL)U6Y+.M[7Pm)S&7oݠWҖ_{bPRe5 P (~#h}h\yԿ *T?RtD"˗/))@>kxb<e/̙3kNεϿo5VGEUSN=u]ldՋ7'y?ygӖEl3nH}MK_ף3(g)ퟺOKCR7~S7z&!˱A9u 2)X (4r4'aE\Kz?ıg,քFSѵ+)f:sg= 若>Sf},cH Z,*xXlc1ː=u+2eW2qۙnߺNo?Rm4f`a`-5l$!_rA.^B0S嫗Zv8ӠĢ1*ڴOs$!ŋL OAB29sWrr&5'ڵ DL;#ׯPDpiN4!3*RȘ"8 ,M&,BlŖ_OxB>cGЅ)~,*UVwLC=Ro(u:}=&P g-\k|8z꥖5LLTк(+z7û4ez Y4Ə-kbYE7nݠYq$ gϞR`a1<]5Y(c2cd6A "=f}~!9wBܜr-4{*!Kl8OQ,Ǟ?\-G?ȤTȒ"1X86YJ.cIIJ72H>,p[L>_ϜJ;aO4w[qqO{p.x k1~ń;(42*VU͛74ebqF[l'',B:iFCYd5nю'XpqeRdM'E'P\.tpфnH Iz 7iq)7 ,M-"\cK f,HK!)d̘A.Eui,LrҊ$czU,_IXX>"NB;\X(Etyj3)dTO0e8Jq@ Xec+dSfĈ%g '>ǎ f|6F'iE)hB3X{{YfF  d9 !k2آ'tmot`wt`"6)YSMxݽmP-+d)-8^2{;Nz)ݼvΟ~ؽC]e9g)+ ,qd ~8H_YckڌZwYC G؅/ _k%y>sd;I >qXR gW/eJaHZu=zPđ8WϫׯRf$WRr%}mvq1e~]5jNWNKWȒV]l]c6O˽R Y*w5B9g!K :OQ,Ǟ? dY` 8A&Bּ)4![y 2k Y=+W q W4}ḼZBG挌,On q2?cLTvWV"N)_43.F;%qyo{>jV:#Tl8[fؘE7+e&@e-s,i.^"~= YgBrѣ486Y==k!ǔ#Gի2)d6;&3=ʷov AwK`x"uo>}P!ncz1IQa~ԦC7SO›Dc+aQ_7_!aέ41n}UX^\ =O>}[]1- 1TR Y27ƕ|.[EV"s+׮% ?SYYҒ=;E~>! us.7b-ime K5ߝʔ*q#kZTh0F1"I_,k}מIB?G?D@ dY0T ^-._L`W٤ cłPx7\2 I#g,nf P_ppe;S\ gh/k!ǭB S'д9+EE}O?Ju%JSԴšJHóO >Zu$clܺ!DCwۍe,kY 6l^R Y.at w?2@;)/Nc<܅s4%"4hF)ZXJuM@5/Uy~qpO5~%nj.!ԭc$Q8?\{u42fHNdSNK5S"KXVEUȲ?A ;!˔8)}I54Q jץV uS˛ ovZaIYL۞K- Y<,d-X@YAAAf Z~1&8͞Mvlq,-VR,eim@ o ߻s&N93bĻyb_S[֍kkxNj]v)D8_m EHa1;O^B~xtauYmEh *]L_(~'ED۰ׯ^{H˻"Nf"KMLgĈX_m7OY,9ń?G԰ncETdcSx8{5^΢1>AԮeʜ9\;#'!GQĤ޻#,.^:/,:B~#(SLZ"gUݏZ6m_ (9,kG Yˢ٬ Bϙ|=E[gϞR`Z~MsܻWpo4k k Yvl M_>qvzpZ申O]˳jAVԴU `]|;. >$ٱd?hÞ널pý7"Sy`&)K-(d&_K͓IB+AiV,SS/jҸ*Yue(7?%vm;ӊ%sIvEYwDT\EcȄ'Jp> r2"VTZ߻CF{ЖoWeWؕް^$Le̳)1n8-s<ׯ`?79sqkW,L"L?GtwȩH1bD?y,,D.ܴ2cw[(tb;g+=xNEc˩}K&~ YJ7V\>L{nYOcdߞ>FO=XR&m̵/'hT9P<'tu)4 \G?9H= i>Ӟ>{8$ncSkdi!ᣇ5t\µ+@iNm?KiҳŘ\S|6jkbC >gxE^JpZxBmf1] YR)7YJNGj,̝*).qyC)ͭ[\zRR3V J)Һx  2qS/ttz x7ŌCB|ŪZ)/Xq- da#%ޅ,L2Alirڳs|7s&vS0>S?)OZs| hUԣ3oRP>wYf̏ҳ5GEL|.bDE\DŽ]8gnlH[w bҹeJ?jn@{_K2^S{xdbK 5'^ y Xb%eiY Z8+μ DZd}kx*\p Kkp\.oFt1!˩X)ʜ% _8}L/' N# Fd iƥ$Z~p,CwV}3O|a1Vvd,4*(L"Dta7]#E.~amѦqOD+@ UKkoP/F X_.ﭧOBZqK=5!JId}R>2@[bc3mjب|O~5qcVIw8ħEK9sLhc =A9FTƫemu?X.IQx<^yw !niB̂eDȺ{ s9J\^|͢S<O,Z:?^5}k!cF] Jh cx;[iBx28RTR5f.}D;ٲOB4Xl K3] 0@ev,,&MK+HqN?!>,BDi2j)3q,r0e~Vmc4lƾl)q1,CVZ8ȤC{M% n h?u,G^{z'!K)=;fԬUG?i&}G}*y,HI"ŌQ?qͭy /ake!y8K"#ߓ8gD!~nthnv .ak 84YYlrK/_-ׄ)˯id˾ɛO+NE';3ϜJD%9Ξ1#(~XoY!0Xf3xP%WXVZxO {wPhd|ŋc#L a4c侳;JD 3MjJS}K'R N=jG&oݠ+DZ{/>o.,xiT !h)B| Ѭ{2@ZڶqIAu /3_張Tr~ Jm H!%F_ξ[O+翔 Y2LM4gᣩ|j".*\I+!6s3b̭O Y[$m(Rb Xo.cIIlL uJc(ػa,VI74cj(IϦFӏ$F>͖WlL;kpU>w6;؝ZjPz! !Wݽme>Uu}&'׎MQ~peˮ1TE;u-bF9srTceaď t dA2峤NݺuB`cYD-YJG `SǟVQ4&3LHNKg3TJlF"&%חPׯ^=E e(ia1cFq?}0OgהGWHd/"5"bNK,che!O_|w1av7֦Xd.})ݼvΟ O7fL2U\!볲)hB>x}na>vƥX:{YDq#9dĵ0ȥrDe @ZdЁ; :ZnK ?kY>F.?'{ 7)Y M31z ݿ{.ĖV8{q^6/;POXU"bLq5jI 2+i%3ğ#ks,7JXsbm:~B!KZ=|p_Ǔ.kW,+'EtS,pv,Y>߄-nݼfbBg-VZi$@r !v>$2cAV dj]POB/-R*dP1:2B,[A d{eƾ sKaN}YMĮ fŊ8RT⾱DZvIB{?VSAMmE=Շ:q4_d>TdhTj lűߑRYF,55 rr:˲PQ% %S] yn\Lf̘IǁKHESif\W S@dB:)! BVrۣ1k}@ dJneU:A u@R7!Kfbz - b fFhSqzx.y N=s31>s,9ϿE$n'" BiB@tт deZÛ `oһ3ԗKY4kZi޺F(ù&oJ,5|/B17M:ƃX2%FhIG/h[/~_*S2y$sӺBOQbsaM@r|IBȩemJ(g޹C~׎Mb9sD>MeN9׮GQ]4ocBR[d|ƙsYrW -x?jj=Qޗ޼y&̓հ0,,si dx5}=TFd6jA"1RY9e^>R=Uڴt#;vmQ?e)Uէ d9ԦrY =|? !(ؼ(0[Ar_w[v/|>MN\JEJ7߇7ft93sR!K̫`O-N;,ݢmgZd5iގB(K*##g欤&-'-iؑBO`Hϛ7;5 :?\ >#"j\!F`)6yQ8߸ڱy-(>_n@=:S&-*23êoѾ]E D{X)2&d{:XELpp0իW&O B>kMr,Xz@瑚oy̅cҙL\Z3փOgBֺkcp A}XrԔRE:7I\M6IҫEVz?D;*Mh'~7?f;Y-KbJd0%Fh0lr\8U㔙ͩ/9TΎwr׬uG3]\z k KЧN& Y r/ߍY0\d.޿KHMsIԆbdݼqUX߱Q p@}Ib9V,G({7.b13"cʑ#IHBMDNPFmU d' cTϐk= YS'Nv:]Ⱥs6Z5jPT@""^~A5H@ Y-un :Kq%^/rօ,'eVL5Y8;cGot-M9? %ݘlez>{nVC)Q[{YJ(^`7ntڽc“M3px Oٖj/9]t I.)}ѵeka/Y+ucatͲbMk.} ~˦>q+<ݻO;lXK}άU?udm)dIK,&^;~Oazڳq,(~jTZ4iEmZϿo5VGEUSN=aƔ9sfQǕ:i. j7oݠWҖ_`BT:"gjΪRhhڶs }]6 8j׬K=;h٪%oRil<Ǐ43vQHeFd-jި5 ,դ ~/-Ek]M7n^<\R/k?C;wm(TNt5Ki |يcd@.ޝbpiDzLH'ѼU2VRzQOd~9K7i?NiYiwnSedRFp3( c,O?JubKVy?) ^Q>s(e[7Ygepb"3id-/+qd?.c(ųBVN=E8(E__|-U n͜:3@cfʘG/g!q'McCHrm8Cxjmcm\tJ{wm%SYK!k 7{̕3c=hlT-s"'_xB-ġKRVZ|z$ʕ)Os h={v5?"&eϟϚ4hF#)ۘ!Q1i۷B)Y9鳧hԧ{?+ޛ89~)d,@q $1 //<B3X E9>Aݽ |RP6J+Ib ц)SfuRxP$x?f/I;F)_O-"Wh1ʞ-;XLpϙ#<؉3߫J,y<}"EDW_s3ę=u5kV͘~:## oNE?]< ~WvzO<[sۓciB:zS7jN ,(t;9(MtL~6 dٴX d&!dG=ȤTȺ{x%.ZO+Vp2.*wiVU!TkԂ/Ј$ɹИCgeA): *tud͠>Η{!cL6VfAC}W¥a-} 4:4XJ7̬e˩d=ϡ=}Gl2GR+]rIdaK1~8 ~0*>\D\ev))8 x빳i"Hk԰ikѾ2Mn{J!~nBӵSiJ1%{Q WB©x0ΟOӢu:v:A`;$ʝ+;0ǒ 9+z9 jOkAXr>qď}!GQDJ+/ 7wZw$#&L Q)%/ʔ),UXBƌ* _UO\s{7_ʝ+}Z3&b(_qMR;s  ;i情ӱ(alSI'zSO QGXW&Mki8?b~'Rޏ( !K<"rQtjM_&N'bU YnkeSypԃLJ,}I 3S,ʄF'I$!/ei Y,M {wPhdH|޼y#R2sP}ͤ"D'eW^ &ja4|d&.geE%LgNn!epQ%+9ef[c\2G^^,;,Ik7&wԬQ$X$/7_?,> LOS"'-N|B#^4l~ Y@};_Or82d=BUm7>#ĥI\h<v?ؗy Iftyzx_L)d%Nϩh1ď!x*\Sx._DSNO1)lԸ~SQ=nbya]5VW?uk* O|Z(s6ˈhwR~p4K$,d_pQtjsޜ>i+QS/i Yiuf1tEBvԃ%,c`um,ceѭCi;RhcA+&~p}\:O##-۰Wo`O)q|S,P/lvkSG#d)׆gx_橜[u/OsGB鳧Bpoڰ|αLƞ-cѓ? 7}ukBr~dR*ď?zJ<˚qd&595VF'β!d?uFiH dy(9\-G=ȤTxH^z,pEvBgI/PF@$#/G8|>ʑeNn^Kk>!K 8Bg !WZ5i^c?CB!YdYRȒY MY=i{5_i"ݖ6]l%dxLA8"[^Ikq5mԜFyN%.:sqM[Ǵ21?{ e44fY?,H dDG=ȤTȒ1}QSēbAGaEPيUxϨhk$KDxn\Lğʘ1:.[8| ̸Hl!S?m\*Z[% IEּ[4YK5j'Bt|wXdB= Yn,`URWʖ.'*P,JRZd-j׬kHXx@kbcR}PAzcqZ*dIz]6-9^c,u?uFiH dy(9YdR"dԨJ:APr+[ַ=5jֆB)N%oks,|qqѡd{l5f{[ю- ڿg'EQ=m^4ʽ?]|`,v=b08vX#Oāuϒnw-dSE`}C.SINiګ%ٿ&M6-'qB o(F)\F{\ >zH#\ʵ+cdqL&#ɟJ%ЌݺsKgAKZzW쥉SGhr4ʕ3I HcERЛ:q&輾f)u-T}A-)dI7F_C1x\XiX%Y:OݨQYic1tN9u Q2 Y|ak,~9SfW&Iw8C.k21_M9Cwn^ﶈ uâsρI.GQéR/ga&E&}Y ߴ9+a6&g-TZ:~@}d(9Ȭ,Z$/I+Y n#ris{|UȒ 3'&<{=Y #\e-FR̙Ug-dk1)&%'xfNS_UPEkI:1X$1&dISIrMQђו\wn 'k Yᬅa:ăFz'Zk,9)fBNpe-ko߁4+n^yL9,un( i1E:'!Kpԃ1!{n WHz!FP#5R`1- .*~e"ǍO!ܾIQr {4>&Qj/!%orD?x m:̄RxHd~̸Ign\L^Cm;e8׶Mk(2dsXL"ΓWY߻C~ԺCwMǏ`WvZ*"i(r V s#ACxк;x{ٲkڟ# B ;JDZDaй}Wpq f΋*,9u͚@;$auυK)8<~V}y si5)<:spDZv?R8+"8JcciEM4aT Aў~AP B]xDj\,a+̌}RKѸ "C#?>jfok Y˕4ύ]8GFRn}){Dl9%lXE9+ca9H́n@.ZrENv-'hDXEY@:OݨQYic1tNBe (.#FSB 3>h Bd)Sҩ{?iıغe[xktu*Is&Qth^ )W]Wڔ7Q?X׵#|ܼ~ S""?N9aK/g!2,LAU^t WA>>r I2f˗sf?x,X0{wnS-Z(;c1X} "J@>z}sF'N^Ofex׆k?d=9B c߭eRF)'dooc)pڴ[Lrk:}(OٵFwu!۾Bc4>+UVsoL86@ IDATqɾߴ8*iDzLKDQ5kV߾a,dm>wnוvAx["K-XܸuFPdnzRú%|ԌWX^:O߬\L[vnk;Q]4"",uPn( i1E:'!KAF?vL Lh#B= Y?uPlIB-m{YhVE6$Uh4j޺蚤'2; 2e })E @2pSA dْ> ijpZTlCnK D*UpbwϞQ."y lSuM!K4aԭCi%YAB"gaZdېgT&6YM!ˬiE!@2R2J- @Ȳ%} "`9 j c58X̧#2? Pony{ dh:2@ @J,=J- @Ȳ%} "`9 j c5N?YV_h-Y?uPlIB-m{YhVE `uAȲ@ n @R78 `KlImEBôZ58X -*B՗tKBO?[eKh,D/rժAjhQ1X[M=4ؒ,[G `!~0V 2VCA}e%@ nqSA dْ> ijpZT V'`,/4,uS:~( $!˖6X_,4LUТb:{ dY} H dzCi%YAB"gaZd !K @%!KJ- @Ȳ%} "`9 j c5N?YV_h-Y?uPlIB-m{YhVE `uAȲ@ n @R78 `KlImEBôZ58X -*B՗tKBO?[H"dٲ3h@@UNK˃L:> i"dEA@:%0,{:aJ=xuҮ#"gR$25 !˚A,qC)'iF'`B@@@-Yv1pke-@@@ W   `q, ! @@@,LB:xKBX, Eu    ! k@@@CBuVEX, Eu    ,,0Y@@@@BX,pE    ,0Y@@@@-YX    `a, Ձ,YZA@@@YX    `a, Ձ[@@@@ dY(YX    `,@@@@ dY( da) @Ȳ0PT   @@@@: dY+jXda  @Ȳ0PT   o @R ea@@@ da u@ȲW     ea@@@   &!@Q@e@@@`5   &!@Q%! K@@@,LB:5   !!:\Q+" k@@@,LB:xKBX, Eu    ! k@@@CBuVEX, Eu    ,,0Y@@@@BX,pE    ,0Y@@@ޝGGQy$+&>è$(Ae,@4$a QbЀ0"/^(^\`@*ʢQyc$Iw*U ].Y<   @ d50(! Y<  5Yָ*  ,@@@@x@@ j`PC@x@@kqU@om@fVd\G@ d!@:2 o dy@, Ȳ&@0ɝ5@h W#Ȋ:}! ']# ]_f rn9Y<  EY, `R $ #`At  rw}8W ˹cd   dYK I,܎Y65 [ ev \,֎#@3 X$@e,"&Lr;6 dوO  n,wח!s[;FA  `AE4 2 (@e#>]# ]_f rn9Y<  EY, `R $ #`At  rw}8W ˹cd   dYK I,܎Y65 [ ev \,֎#@3 X$@e,"&Lr;6 dوO  n,wח!s[;FA  `AE4 2 (@e#>]# ]_f rn9Y<  EY, `R $ #`At  rw}8W ˹cd   dYK I,܎Y65 [ ev \,֎#@3 X$@e,"&Lr;6 dوO  n,wח!@*++%!!tY ilc@pA+@ !C瓤zwOUo:nDv,K@@Yn,B;Tddeee+Rg{$u@x׎eOh*mo u)w+ _`=}oر_;{5X4$r??Ėu"!T, DS +^PӭOEiIL-NI?sVʄQR`/LS[MxΘ*+n;zʇ Sf  MhjxIX8H-/Yj3EU'OH٢"Yr4<=\w# 4a,Ӓ5XP+s|Y$  Y,ϖ#WeEhd,ڪVh^壶^lYj K3gΜ@@ dy'lUVAV:v,M)Yrs[󢼵/ͺ+kw?h\?>$oozJGdLTm/ܧJq^_'*d-v;Jҥ[9sM?1n{eUxDp⎻zKN~\x%<)/#ׯm?2^Z]wCmjj믾,w~Wuvyh4$E eëkCۤwdYƙg^ھpEp  AV`\D P۪-*92ӗ.gȄ'%..N_l?BW͚_!'Ov#?Tْxȏ!vlסu7$M/HtH C2˗.kV=/8\Fi?> _:uJKgq~)(w}<6z=rJeY*[rQ}^{}J=OoNByLU53kک{{ƤM#'H\ƺ#JHh*Ce.ÇcO+ynj_B!):L‡ Uf  `A-tkUV@ +C[ew]=&ˡJ!ɞ:QTFt#|TNJꡌ|9߅ZXBZ*CGHJF.(2:ug#ݓ):h-X$ əWO?3?7,5~Tش1Nq^Ji}c;=wU˓^}a*zۻT ԛ&՘2 tHvI9Q#mBzW=SE@h 45VA"o;KOw|r'OXݣ&m"z*tӫο@ fdJ6dUa͸yD=Ym/To{2{So]|iK?|PN%~*ŋ_kZ]^Eml/E%/;omLAՔBZ#,=   @ DPX>OԊ,.d8A֯υ{۶Ȼo \In{F9Og ]gIUyum`\Q叝эqo:r^!xq) #"Ȋ_ @@@6p{ BW$eGϚ9Od=*+C-aPۛ;"K]i{FodƴVe d0l|bKM[ Y+nݯ>Aj7d^I{I@@@k5V k~` t3 S&d暑ʧkokī< u{} 6Mշ d O}dz~A k6ͷ<_>AV^YY>\   Vc`ddv,;?. _ndj̬IOUtO? K.VdշP^`cll+ܲ́jjCG=Vpvczs〇G.r_DN:=ܷ7YG(="lgdն%l- :@@@ '" d?Oyv~ݽ{ַ/3̴6IWn٪=FP]=M1:ut[ OdSs&0&m.ܷkQ{' ^q @@@+ fB/~*ede5ΈRMɜ@ TS{_j3kmyq`GmS4^D#/'CnwL̕fͯe.XRRB$UROmTaV-FVsD:Ӳ1Nލ-{v _;P>`=<¼rQ>Y^6sE@@.`ƪO ];ꬼ _rJ|G:\>|o];rz{٥"^EÓ{?3t!FɾW{M@E3S/>^#ҩM-plsTd>LIޤ3d4k({v@y S;mjëkLc͚_WI"CGN}d?y>Y^2sD@@\ Vc IRRRgŷ TrczQ﮲].xWdLR02Y2ltqɓ̈́s*7!֧ VfmڰN^Z^.۷n_~>w?<xZVW3U&1VRvWyz^Y*j:yiټ5IHh^Ǫ滪\ܸzvg/=.R,08 gEֱ_ e%g:#,...&O2@@@`z^ Lcр_SgMO   1 @EpbhYQF   %(A((AVlԁQ   DI +J. +6 Ju`   Q Ȋ˺!ȊdF   @M7X @e*M"  ĮAVֆ!PAV]B|   *,WxL cg   0' d9z@@@ bɸ ȊR0@@@hdEC>F WZE@@Q- B 0@@@=Y%3AjΌ@@@O dyLY/ G@@L +2/F b@@@r,ˉ,a@@@X ȊŪ0& ω@@@\"@B2 O dyL@@Awk̝/@2@@@"RbL + p@@@ Ȳ֗R J]F@@9+ B l@@@ Yn"sAW+ϼ@@@ dyLY(#@@@W +\)C b&@@@B, qi,y@@@ Ȋz0" ȊDk@fCIDAT@@/@2 dyL@@AΜ"@J2@@@b"bR +& @@@0,ڧ]^3gΜz@@@@^,{TmhDIENDB`labelled/vignettes/packed_columns.Rmd0000644000176200001440000000464014444527456017504 0ustar liggesusers--- author: "Joseph Larmarange" title: "Variables labels and packed columns" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Variables labels and packed columns} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- The **tidyr** allows to group several columns of a tibble into one single df-column, see `tidyr::pack()`. Such df-column is itself a tibble. It's not currently clear why you would ever want to pack columns since few functions work with this sort of data. ```{r} library(tidyr) d <- iris %>% as_tibble() %>% pack( Sepal = starts_with("Sepal"), Petal = starts_with("Petal"), .names_sep = "." ) str(d) class(d$Sepal) ``` Regarding variable labels, you may want to define a label for one sub-column of a df-column, or eventually a label for the df-column itself. For a sub-column, you could use easily `var_label()` to define your label. ```{r} library(labelled) var_label(d$Sepal$Length) <- "Length of the sepal" str(d) ``` But you cannot use directly `var_label()` for the df-column. ```{r} var_label(d$Petal) <- "wrong label for Petal" str(d) ``` As `d$Petal` is itself a tibble, applying `var_label()` on it would have an effect on each sub-column. To change a variable label to the df-column itself, you could use `label_attribute()`. ```{r} label_attribute(d$Petal) <- "correct label for Petal" str(d) ``` On the other hand, `set_variable_labels()` works differently, as the primary intention of this function is to work on the columns of a tibble. ```{r} d <- d %>% set_variable_labels(Sepal = "Label of the Sepal df-column") str(d) ``` This is equivalent to: ```{r} var_label(d) <- list(Sepal = "Label of the Sepal df-column") str(d) ``` To use `set_variable_labels()` on sub-columns, you should use this syntax: ```{r} d$Petal <- d$Petal %>% set_variable_labels( Length = "Petal length", Width = "Petal width" ) str(d) ``` If you want to get the list of variable labels of a tibble, by default `var_label()` or `get_variable_labels()` will return the labels of the first level of columns. ```{r} d %>% get_variable_labels() ``` To obtain the list of variable labels for sub-columns, you could use `recurse = TRUE`: ```{r} d %>% get_variable_labels(recurse = TRUE) d %>% get_variable_labels( recurse = TRUE, null_action = "fill", unlist = TRUE ) ``` labelled/vignettes/look_for.Rmd0000644000176200001440000001150614357761455016330 0ustar liggesusers--- author: "Joseph Larmarange" title: "Generate a data dictionnary and search for variables with `look_for()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Generate a data dictionnary and search for variables with `look_for()`} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ## Showing a summary of a data frame ### Default printing of tibbles It is a common need to easily get a description of all variables in a data frame. When a data frame is converted into a tibble (e.g. with `dplyr::as_tibble()`), it as a nice printing showing the first rows of the data frame as well as the type of column. ```{r message=FALSE} library(dplyr) ``` ```{r} iris %>% as_tibble() ``` However, when you have too many variables, all of them cannot be printed and their are just listed. ```{r} data(fertility, package = "questionr") women ``` Note: in **R** console, value labels (if defined) are usually printed but they do not appear in a R markdown document like this vignette. ### `dplyr::glimpse()` The function `dplyr::glimpse()` allows you to have a quick look at all the variables in a data frame. ```{r} glimpse(iris) glimpse(women) ``` It will show you the first values of each variable as well as the type of each variable. However, some important informations are not displayed: - variable labels, when defined; - value labels for labelled vectors; - the list of levels for factors; - the range of values for numerical variables. ### `labelled::look_for()` `look_for()` provided by the `labelled` package will print in the console a data dictionary of all variables, showing variable labels when available, the type of variable and a list of values corresponding to: - levels for factors; - value labels for labelled vectors; - the range of observed values in the vector otherwise (if `details = "full"`). ```{r} library(labelled) look_for(iris) look_for(women) ``` Note that `lookfor()` and `generate_dictionary()` are synonyms of `look_for()` and works exactly in the same way. If there is not enough space to print full labels in the console, they will be truncated (truncation is indicated by a `~`). ## Searching variables by key When a data frame has dozens or even hundreds of variables, it could become difficult to find a specific variable. In such case, you can provide an optional list of keywords, which can be simple character strings or regular expression, to search for specific variables. ```{r} # Look for a single keyword. look_for(iris, "petal") look_for(iris, "s") # Look for with a regular expression look_for(iris, "petal|species") look_for(iris, "s$") # Look for with several keywords look_for(iris, "pet", "sp") # Look_for will take variable labels into account look_for(women, "read", "level") ``` By default, `look_for()` will look through both variable names and variables labels. Use `labels = FALSE` to look only through variable names. ```{r} look_for(women, "read") look_for(women, "read", labels = FALSE) ``` Similarly, the search is by default case insensitive. To make the search case sensitive, use `ignore.case = FALSE`. ```{r} look_for(iris, "sepal") look_for(iris, "sepal", ignore.case = FALSE) ``` ## Level of details If you just want to use the search feature of `look_for()` without computing the details of each variable, simply indicate `details = "none"` or `details = FALSE`. ```{r} look_for(women, "id", details = "none") ``` If you want more details (but can be time consuming for big data frames), indicate `details = "full"` or `details = TRUE`. ```{r} look_for(women, details = "full") look_for(women, details = "full") %>% dplyr::glimpse() ``` ## Advanced usages of `look_for()` `look_for()` returns a detailed tibble which is summarized before printing. To deactivate default printing and see full results, simply use `dplyr::as_tibble()`, `dplyr::glimpse()` or even `utils::View()`. ```{r, eval=FALSE} look_for(women) %>% View() ``` ```{r} look_for(women) %>% as_tibble() glimpse(look_for(women)) ``` The tibble returned by `look_for()` could be easily manipulated for advanced programming. When a column has several values for one variable (e.g. `levels` or `value_labels`), results as stored with nested named list. You can convert named lists into simpler character vectors, you can use `convert_list_columns_to_character()`. ```{r} look_for(women) %>% convert_list_columns_to_character() ``` Alternatively, you can use `lookfor_to_long_format()` to transform results into a long format with one row per factor level and per value label. ```{r} look_for(women) %>% lookfor_to_long_format() ``` Both can be combined: ```{r} look_for(women) %>% lookfor_to_long_format() %>% convert_list_columns_to_character() ``` labelled/vignettes/missing_values.Rmd0000644000176200001440000001445514357761455017554 0ustar liggesusers--- author: "Joseph Larmarange" title: "About missing values: regular NAs, tagged NAs and user NAs" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{About missing values: regular NAs, tagged NAs and user NAs} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- In base **R**, missing values are indicated using the specific value `NA`. **Regular NAs** could be used with any type of vector (double, integer, character, factor, Date, etc.). Other statistical software have implemented ways to differentiate several types of missing values. **Stata** and **SAS** have a system of **tagged NAs**, where NA values are tagged with a letter (from a to z). **SPSS** allows users to indicate that certain non-missing values should be treated in some analysis as missing (**user NAs**). The `haven` package implements **tagged NAs** and **user NAs** in order to keep this information when importing files from **Stata**, **SAS** or **SPSS**. ```{r} library(labelled) ``` ## Tagged NAs ### Creation and tests **Tagged NAs** are proper `NA` values with a tag attached to them. They can be created with `tagged_na()`. The attached tag should be a single letter, lowercase (a-z) or uppercase (A-Z). ```{r} x <- c(1:5, tagged_na("a"), tagged_na("z"), NA) ``` For most **R** functions, tagged NAs are just considered as regular NAs. By default, they are just printed as any other regular NA. ```{r} x is.na(x) ``` To show/print their tags, you need to use `na_tag()`, `print_tagged_na()` or `format_tagged_na()`. ```{r} na_tag(x) print_tagged_na(x) format_tagged_na(x) ``` To test if a certain NA is a regular NA or a tagged NA, you should use `is_regular_na()` or `is_tagged_na()`. ```{r} is.na(x) is_tagged_na(x) # You can test for specific tagged NAs with the second argument is_tagged_na(x, "a") is_regular_na(x) ``` Tagged NAs could be defined **only** for double vectors. If you add a tagged NA to a character vector, it will be converted into a regular NA. If you add a tagged NA to an integer vector, the vector will be converted into a double vector. ```{r, error=TRUE} y <- c("a", "b", tagged_na("z")) y is_tagged_na(y) format_tagged_na(y) z <- c(1L, 2L, tagged_na("a")) typeof(z) format_tagged_na(z) ``` ### Unique values, duplicates and sorting with tagged NAs By default, functions such as `base::unique()`, `base::duplicated()`, `base::order()` or `base::sort()` will treat tagged NAs as the same thing as a regular NA. You can use `unique_tagged_na()`, `duplicated_tagged_na()`, `order_tagged_na()` and `sort_tagged_na()` as alternatives that will treat two tagged NAs with different tags as separate values. ```{r} x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA) x %>% print_tagged_na() unique(x) %>% print_tagged_na() unique_tagged_na(x) %>% print_tagged_na() duplicated(x) duplicated_tagged_na(x) sort(x, na.last = TRUE) %>% print_tagged_na() sort_tagged_na(x) %>% print_tagged_na() ``` ### Tagged NAs and value labels It is possible to define value labels for tagged NAs. ```{r} x <- c(1, 0, 1, tagged_na("r"), 0, tagged_na("d"), tagged_na("z"), NA) val_labels(x) <- c( no = 0, yes = 1, "don't know" = tagged_na("d"), refusal = tagged_na("r") ) x ``` When converting such labelled vector into factor, tagged NAs are, by default, converted into regular NAs (it is not possible to define tagged NAs with factors). ```{r} to_factor(x) ``` However, the option `explicit_tagged_na` of `to_factor()` allows to transform tagged NAs into explicit factor levels. ```{r} to_factor(x, explicit_tagged_na = TRUE) to_factor(x, levels = "prefixed", explicit_tagged_na = TRUE) ``` ### Conversion into user NAs Tagged NAs can be converted into user NAs with `tagged_na_to_user_na()`. ```{r} tagged_na_to_user_na(x) tagged_na_to_user_na(x, user_na_start = 10) ``` Use `tagged_na_to_regular_na()` to convert tagged NAs into regular NAs. ```{r} tagged_na_to_regular_na(x) tagged_na_to_regular_na(x) %>% is_tagged_na() ``` ## User NAs `haven` introduced an `haven_labelled_spss` class to deal with user defined missing values in a similar way as **SPSS**. In such case, additional attributes will be used to indicate with values should be considered as missing, but such values will not be stored as internal `NA` values. You should note that most R function will not take this information into account. Therefore, you will have to convert missing values into `NA` if required before analysis. These defined missing values could co-exist with internal `NA` values. ### Creation User NAs could be created directly with `labelled_spss()`. You can also manipulate them with `na_values()` and `na_range()`. ```{r} v <- labelled(c(1, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9)) v na_values(v) <- 9 v na_values(v) <- NULL v na_range(v) <- c(5, Inf) na_range(v) v ``` NB: you cant also use `set_na_range()` and `set_na_values()` for a `dplyr`-like syntax. ```{r} library(dplyr) # setting value labels and user NAs df <- tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) %>% set_value_labels(s2 = c(yes = 1, no = 2)) %>% set_na_values(s2 = 9) df$s2 # removing user NAs df <- df %>% set_na_values(s2 = NULL) df$s2 ``` ### Tests Note that `is.na()` will return `TRUE` for user NAs. Use `is_user_na()` to test if a specific value is a user NA and `is_regular_na()` to test if it is a regular NA. ```{r} v is.na(v) is_user_na(v) is_regular_na(v) ``` ### Conversion For most **R** functions, user NAs values are **still** regular values. ```{r} x <- c(1:5, 11:15) na_range(x) <- c(10, Inf) val_labels(x) <- c("dk" = 11, "refused" = 15) x mean(x) ``` You can convert user NAs into regular NAs with `user_na_to_na()` or `user_na_to_regular_na()` (both functions are identical). ```{r} user_na_to_na(x) mean(user_na_to_na(x), na.rm = TRUE) ``` Alternatively, if the vector is numeric, you can convert user NAs into tagged NAs with `user_na_to_tagged_na()`. ```{r} user_na_to_tagged_na(x) mean(user_na_to_tagged_na(x), na.rm = TRUE) ``` Finally, you can also remove user NAs definition without converting these values to `NA`, using `remove_user_na()`. ```{r} remove_user_na(x) mean(remove_user_na(x)) ``` labelled/vignettes/approaches.drawio0000644000176200001440000000270014357761455017402 0ustar liggesusers7VlNc5swEP01PqZjIP46+iNOM01m0ubQppeODAsoFVpGyDbk11dgYcDErps4CY57SbRPK4He29VKuGWNg/hSkNC/QQdYy2w7ccuatEzTaPcH6l+KJCtk0NWAJ6ijnQrgjj5CPlKjc+pAVHGUiEzSsArayDnYsoIRIXBZdXORVZ8aEg9qwJ1NWB39Th3pr9C+2Svwz0A9P3+yka8vILmzXknkEweXJci6aFljgShXrSAeA0vJy3lZjZtu6V2/mAAu9xlw05ldX329nRpB+CWaX0WTQV+edfS7ySRfMDhq/dpEIX30kBN2UaAjgXPuQDprW1mFzzViqEBDgQ8gZaLFJHOJCvJlwHQvxFT+KLXv06k+dbQ1ifXMmZHkBpci+VE2SqNSsxiWWfm4OkuauAjnwoYd1OTRRoQHcoefufJLeSs9QGtwCRiAeh/lIIARSRfVuCI6PL21X6GgamgR/0FQPe+CsLl+0oRIopCrIFRS1eSuirn0qYS7kGTELFVGV4VzKWNjZCiysZZDoO/aCo+kwN9Q6unafZi5u+hfgJAQ7yRM957r/Emq5rLIRiNPMb+UibnfwRnuHkvKPD/0zT1D32pU6Ju10B8jV3EWUeRZ2WilcdJlakWjmVAtL225xJYooqxvqv7yeQCC2gdNFDCcDvSeSpRBt2eR7mESxWw3LFN6Hz9TrD0z5bxRmWJtKxI2A8Ip9+ppoh0E2OhkDkdXR6xuw9LjfJsKRMV7EtHoCEnumA0j2fh/wt3KzWDPzctoVp0ffPQj7jpD3i9rjqZ0v8O9cN+LodGsom/Ur4andT7ePAA0IM2O5i75Dmm279E6j+u3T7PHe2/xazxdkN5DspjNgntkg/wDTBMk7b2mpht0H05kanozJ7bFz6V70x59c70lnp29VNJs6FAIkpQcQqRcRqWZb1OgtGFs1uXOxhfWv/jnF/IinFZvUATXeikv2ENO8jK3+a2jAXv5KVznGkBzv0bzMAwFEttX6LDGsVqorBJZJYwjhw12NUQY9bgyGbjpDClp1CZsqOGAOk62eT+lW1VZF7nMC8D2zfLZ14V9NTFfTZP6jaykyegUNTHabyjK9or51HYE8ZHekmubUf/1NiNlFr/Mrkp18fu2dfEHlabelled/R/0000755000176200001440000000000014444527456012236 5ustar liggesuserslabelled/R/var_label.R0000644000176200001440000002026614444541051014301 0ustar liggesusers#' Get / Set a variable label #' #' @param x a vector or a data.frame #' @param value a character string or `NULL` to remove the label #' For data frames, with `var_labels()`, it could also be a named list or a #' character vector of same length as the number of columns in `x`. #' @param unlist for data frames, return a named vector instead of a list #' @param null_action for data frames, by default `NULL` will be returned for #' columns with no variable label. Use `"fill"` to populate with the column name #' instead, or `"skip"` to remove such values from the returned list. #' @param recurse if `TRUE`, will apply `var_label()` on packed columns #' (see [tidyr::pack()]) to return the variable labels of each sub-column; #' otherwise, the label of the group of columns will be returned. #' @details #' `get_variable_labels()` is identical to `var_label()`. #' #' For data frames, if you are using `var_label()<-` and if `value` is a #' named list, only elements whose name will match a column of the data frame #' will be taken into account. If `value` is a character vector, labels should #' be in the same order as the columns of the data.frame. #' #' If you are using `label_attribute()<-` or `set_label_attribute()` on a data #' frame, the label attribute will be attached to the data frame itself, not #' to a column of the data frame. #' #' If you are using packed columns (see [tidyr::pack()]), please read the #' dedicated vignette. #' @examples #' var_label(iris$Sepal.Length) #' var_label(iris$Sepal.Length) <- 'Length of the sepal' #' \dontrun{ #' View(iris) #' } #' # To remove a variable label #' var_label(iris$Sepal.Length) <- NULL #' # To change several variable labels at once #' var_label(iris) <- c( #' "sepal length", "sepal width", "petal length", #' "petal width", "species" #') #' var_label(iris) #' var_label(iris) <- list( #' Petal.Width = "width of the petal", #' Petal.Length = "length of the petal", #' Sepal.Width = NULL, #' Sepal.Length = NULL #' ) #' var_label(iris) #' var_label(iris, null_action = "fill") #' var_label(iris, null_action = "skip") #' var_label(iris, unlist = TRUE) #' # #' @export var_label <- function(x, ...) { rlang::check_dots_used() UseMethod("var_label") } var_label_no_check <- function(x, ...) { UseMethod("var_label") } #' @export var_label.default <- function(x, ...) { attr(x, "label", exact = TRUE) } #' @rdname var_label #' @export var_label.data.frame <- function(x, unlist = FALSE, null_action = c("keep", "fill", "skip"), recurse = FALSE, ...) { if (recurse) { r <- lapply( x, var_label_no_check, unlist = unlist, null_action = null_action, recurse = TRUE ) } else { r <- lapply(x, label_attribute) } null_action <- match.arg(null_action) if (null_action == "fill") { r <- mapply( function(l, n) { if (is.null(l)) n else l }, r, names(r), SIMPLIFY = FALSE ) } if (null_action == "skip") { r <- r[!sapply(r, is.null)] } if (unlist) { r <- lapply( r, function(x) { if (is.null(x)) "" else x } ) r <- base::unlist(r, use.names = TRUE) } r } #' @rdname var_label #' @export `var_label<-` <- function(x, value) { UseMethod("var_label<-") } #' @export `var_label<-.default` <- function(x, value) { label_attribute(x) <- value x } #' @export `var_label<-.data.frame` <- function(x, value) { if ( (!is.character(value) && !is.null(value)) && !is.list(value) || (is.character(value) && length(value) > 1 && length(value) != ncol(x)) ) stop( paste0( "`value` should be a named list, NULL, a single character string or a ", "character vector of same length than the number of columns in `x`" ), call. = FALSE, domain = "R-labelled") if (is.character(value) && length(value) == 1) { value <- as.list(rep(value, ncol(x))) names(value) <- names(x) } if (is.character(value) && length(value) == ncol(x)) { value <- as.list(value) names(value) <- names(x) } if (is.null(value)) { value <- as.list(rep(1, ncol(x))) names(value) <- names(x) value <- lapply(value, function(x) { x <- NULL }) } if (!all(names(value) %in% names(x))) { missing_names <- stringr::str_c( setdiff(names(value), names(x)), collapse = ", " ) stop("some variables not found in x:", missing_names) } value <- value[names(value) %in% names(x)] for (var in names(value)) label_attribute(x[[var]]) <- value[[var]] x } #' @rdname var_label #' @export get_variable_labels <- var_label #' @rdname var_label #' @param .data a data frame or a vector #' @param ... name-value pairs of variable labels (see examples) #' @param .labels variable labels to be applied to the data.frame, #' using the same syntax as `value` in `var_label(df) <- value`. #' @param .strict should an error be returned if some labels #' doesn't correspond to a column of `x`? #' @note #' `set_variable_labels()` could be used with \pkg{dplyr} syntax. #' @return #' `set_variable_labels()` will return an updated copy of `.data`. #' @examples #' if (require(dplyr)) { #' # adding some variable labels #' df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) %>% #' set_variable_labels(s1 = "Sex", s2 = "Yes or No?") #' var_label(df) #' #' # removing a variable label #' df <- df %>% set_variable_labels(s2 = NULL) #' var_label(df$s2) #' #' # Set labels from dictionary, e.g. as read from external file #' # One description is missing, one has no match #' description = tibble( #' name = c( #' "Sepal.Length", "Sepal.Width", "Petal.Length", "Petal.Width", #' "Something"), #' label = c( #' "Sepal length", "Sepal width", "Petal length", "Petal width", #' "something") #' ) #' var_labels <- setNames(as.list(description$label), description$name) #' iris_labelled <- iris %>% #' set_variable_labels(.labels = var_labels, .strict = FALSE) #' var_label(iris_labelled) #' #' # defining variable labels derived from variable names #' if (require(snakecase)) { #' iris <- iris %>% #' set_variable_labels(.labels = to_sentence_case(names(iris))) #' var_label(iris) #' } #' #' # example with a vector #' v <- 1:5 #' v <- v %>% set_variable_labels("a variable label") #' v #' v %>% set_variable_labels(NULL) #' } #' @export set_variable_labels <- function(.data, ..., .labels = NA, .strict = TRUE) { # not a data.frame if (!is.data.frame(.data)) { if (!identical(.labels, NA)) { label_attribute(.data) <- .labels } else { label_attribute(.data) <- unname(unlist(rlang::dots_list(...))) } return(.data) } # data.frame case if (!identical(.labels, NA)) { if (!.strict) .labels <- .labels[intersect(names(.labels), names(.data))] var_label(.data) <- .labels } values <- rlang::dots_list(...) if (length(values) > 0) { if (.strict && !all(names(values) %in% names(.data))) { missing_names <- stringr::str_c( setdiff(names(values), names(.data)), collapse = ", " ) stop("some variables not found in .data: ", missing_names) } for (v in intersect(names(values), names(.data))) label_attribute(.data[[v]]) <- values[[v]] } .data } #' @rdname var_label #' @export label_attribute <- function(x) { attr(x, "label", exact = TRUE) } #' @rdname var_label #' @export get_label_attribute <- function(x) { label_attribute(x) } #' @rdname var_label #' @export set_label_attribute <- function(x, value) { if ((!is.character(value) && !is.null(value)) || length(value) > 1) stop( "`value` should be a single character string or NULL", call. = FALSE, domain = "R-labelled" ) attr(x, "label") <- value x } #' @rdname var_label #' @export `label_attribute<-` <- set_label_attribute labelled/R/copy_labels.R0000644000176200001440000000567114357761455014670 0ustar liggesusers#' Copy variable and value labels and SPSS-style missing value #' #' This function copies variable and value labels (including missing values) #' from one vector to another or from one data frame to another data frame. #' For data frame, labels are copied according to variable names, and only #' if variables are the same type in both data frames. #' #' Some base \R functions like [base::subset()] drop variable and #' value labels attached to a variable. `copy_labels` could be used #' to restore these attributes. #' #' `copy_labels_from` is intended to be used with \pkg{dplyr} syntax, #' see examples. #' #' @param from A vector or a data.frame (or tibble) to copy labels from. #' @param to A vector or data.frame (or tibble) to copy labels to. #' @param .strict When `from` is a labelled vector, `to` have to be of the same #' type (numeric or character) in order to copy value labels and SPSS-style #' missing values. If this is not the case and `.strict = TRUE`, an error #' will be produced. If `.strict = FALSE`, only variable label will be #' copied. #' @export #' @examples #' library(dplyr) #' df <- tibble( #' id = 1:3, #' happy = factor(c('yes', 'no', 'yes')), #' gender = labelled(c(1, 1, 2), c(female = 1, male = 2)) #' ) %>% #' set_variable_labels( #' id = "Individual ID", #' happy = "Are you happy?", #' gender = "Gender of respondent" #' ) #' var_label(df) #' fdf <- df %>% filter(id < 3) #' var_label(fdf) # some variable labels have been lost #' fdf <- fdf %>% copy_labels_from(df) #' var_label(fdf) #' #' # Alternative syntax #' fdf <- subset(df, id < 3) #' fdf <- copy_labels(from = df, to = fdf) copy_labels <- function(from, to, .strict = TRUE) { UseMethod("copy_labels") } #' @export copy_labels.default <- function(from, to, .strict = TRUE) { if (!is.atomic(from)) stop("`from` should be a vector or a data.frame", call. = FALSE, domain = "R-labelled") if (!is.atomic(to)) stop("`to` should be a vector", call. = FALSE, domain = "R-labelled") var_label(to) <- var_label(from) to } #' @export copy_labels.haven_labelled <- function(from, to, .strict = TRUE) { if (mode(from) != mode(to) && .strict) stop("`from` and `to` should be of same type", call. = FALSE, domain = "R-labelled") var_label(to) <- var_label(from) if (mode(from) == mode(to)) { val_labels(to) <- val_labels(from) na_range(to) <- na_range(from) na_values(to) <- na_values(from) } to } #' @export copy_labels.data.frame <- function(from, to, .strict = TRUE) { if (!is.data.frame(to)) stop("`to` should be a data frame", call. = FALSE, domain = "R-labelled") for (var in names(to)) if (var %in% names(from)) to[[var]] <- copy_labels(from[[var]], to[[var]], .strict = .strict) to } #' @rdname copy_labels #' @export copy_labels_from <- function(to, from, .strict = TRUE) { copy_labels(from, to, .strict = .strict) } labelled/R/na_values.R0000644000176200001440000002660214444527456014344 0ustar liggesusers#' Get / Set SPSS missing values #' #' @param x A vector (or a data frame). #' @param value A vector of values that should also be considered as missing #' (for `na_values`) or a numeric vector of length two giving the (inclusive) #' extents of the range (for `na_values`, use `-Inf` and `Inf` if you #' want the range to be open ended). #' @details #' See [haven::labelled_spss()] for a presentation of SPSS's user defined #' missing values. #' #' Note that [base::is.na()] will return `TRUE` for user defined missing values. #' It will also return `TRUE` for regular `NA` values. If you want to test if a #' specific value is a user NA but not a regular `NA`, use `is_user_na()`. #' If you want to test if a value is a regular `NA` but not a user NA, not a #' tagged NA, use `is_regular_na()`. #' #' You can use [user_na_to_na()] to convert user defined missing values to #' regular `NA`. Note that any value label attached to a user defined missing #' value will be lost. #' [user_na_to_regular_na()] is a synonym of [user_na_to_na()]. #' #' The method [user_na_to_tagged_na()] will convert user defined missing values #' into [haven::tagged_na()], preserving value labels. Please note that #' [haven::tagged_na()] are defined only for double vectors. Therefore, integer #' `haven_labelled_spss` vectors will be converted into double `haven_labelled` #' vectors; and [user_na_to_tagged_na()] cannot be applied to a character #' `haven_labelled_spss` vector. #' #' [tagged_na_to_user_na()] is the opposite of [user_na_to_tagged_na()] and #' convert tagged `NA` into user defined missing values. #' #' @return #' `na_values()` will return a vector of values that should also be #' considered as missing. #' `na_range()` will return a numeric vector of length two giving the #' (inclusive) extents of the range. #' @seealso [haven::labelled_spss()], [user_na_to_na()] #' @examples #' v <- labelled(c(1,2,2,2,3,9,1,3,2,NA), c(yes = 1, no = 3, "don't know" = 9)) #' v #' na_values(v) <- 9 #' na_values(v) #' v #' #' is.na(v) # TRUE for the 6th and 10th values #' is_user_na(v) # TRUE only for the 6th value #' #' user_na_to_na(v) #' na_values(v) <- NULL #' v #' na_range(v) <- c(5, Inf) #' na_range(v) #' v #' user_na_to_na(v) #' user_na_to_tagged_na(v) #' #' # it is not recommended to mix user NAs and tagged NAs #' x <- c(NA, 9, tagged_na("a")) #' na_values(x) <- 9 #' x #' is.na(x) #' is_user_na(x) #' is_tagged_na(x) #' is_regular_na(x) #' #' @export na_values <- function(x) { UseMethod("na_values") } #' @export na_values.default <- function(x) { # return nothing NULL } #' @export na_values.haven_labelled_spss <- function(x) { attr(x, "na_values", exact = TRUE) } #' @export na_values.data.frame <- function(x) { lapply(x, na_values) } #' @rdname na_values #' @export `na_values<-` <- function(x, value) { UseMethod("na_values<-") } #' @export `na_values<-.default` <- function(x, value) { if (!is.null(value)) x <- labelled_spss( x, val_labels(x), na_values = value, na_range = attr(x, "na_range"), label = var_label(x) ) # else do nothing x } #' @export `na_values<-.factor` <- function(x, value) { if (!is.null(value)) stop("`na_values()` cannot be applied to factors.") x %>% remove_attributes("na_values") } #' @export `na_values<-.haven_labelled` <- function(x, value) { if (is.null(value)) { attr(x, "na_values") <- NULL if (is.null(attr(x, "na_range"))) { x <- labelled(x, val_labels(x), label = var_label(x)) } } else { x <- labelled_spss( x, val_labels(x), na_values = value, na_range = attr(x, "na_range"), label = var_label(x) ) } x } #' @export `na_values<-.data.frame` <- function(x, value) { if (!is.list(value)) { temp <- as.list(rep(1, ncol(x))) names(temp) <- names(x) value <- lapply(temp, function(x) { x <- value }) } if (!all(names(value) %in% names(x))) { missing_names <- stringr::str_c( setdiff(names(value), names(x)), collapse = ", " ) stop("some variables not found in x:", missing_names) } for (var in names(value)) if (!is.null(value[[var]])) { if (mode(x[[var]]) != mode(value[[var]])) stop("`x` and `value` must be same type", call. = FALSE, domain = "R-labelled") if (typeof(x[[var]]) != typeof(value[[var]])) mode(value[[var]]) <- typeof(x[[var]]) } for (var in names(value)) na_values(x[[var]]) <- value[[var]] x } #' @rdname na_values #' @export na_range <- function(x) { UseMethod("na_range") } #' @export na_range.default <- function(x) { # return nothing NULL } #' @export na_range.haven_labelled_spss <- function(x) { attr(x, "na_range", exact = TRUE) } #' @export na_range.data.frame <- function(x) { lapply(x, na_range) } #' @rdname na_values #' @export `na_range<-` <- function(x, value) { UseMethod("na_range<-") } #' @export `na_range<-.default` <- function(x, value) { if (!is.null(value)) x <- labelled_spss( x, val_labels(x), na_values = attr(x, "na_values"), na_range = value, label = var_label(x) ) # else do nothing x } #' @export `na_range<-.factor` <- function(x, value) { if (!is.null(value)) stop("`na_range()` cannot be applied to factors.") x %>% remove_attributes("na_range") } #' @export `na_range<-.haven_labelled` <- function(x, value) { if (is.null(value)) { attr(x, "na_range") <- NULL if (is.null(attr(x, "na_values"))) { x <- labelled(x, val_labels(x), label = var_label(x)) } } else { x <- labelled_spss( x, val_labels(x), na_values = attr(x, "na_values"), na_range = value, label = var_label(x) ) } x } #' @export `na_range<-.data.frame` <- function(x, value) { if (!is.list(value)) { temp <- as.list(rep(1, ncol(x))) names(temp) <- names(x) value <- lapply(temp, function(x) { x <- value }) } if (!all(names(value) %in% names(x))) { missing_names <- stringr::str_c( setdiff(names(value), names(x)), collapse = ", " ) stop("some variables not found in x:", missing_names) } for (var in names(value)) if (!is.null(value[[var]])) { if (mode(x[[var]]) != mode(value[[var]])) stop("`x` and `value` must be same type", call. = FALSE, domain = "R-labelled") if (typeof(x[[var]]) != typeof(value[[var]])) mode(value[[var]]) <- typeof(x[[var]]) } for (var in names(value)) na_range(x[[var]]) <- value[[var]] x } #' @rdname na_values #' @export get_na_values <- na_values #' @rdname na_values #' @export get_na_range <- na_range #' @rdname na_values #' @param .data a data frame or a vector #' @param ... name-value pairs of missing values (see examples) #' @param .values missing values to be applied to the data.frame, #' using the same syntax as `value` in `na_values(df) <- value` or #' `na_range(df) <- value`. #' @param .strict should an error be returned if some labels #' doesn't correspond to a column of `x`? #' @note #' `get_na_values()` is identical to `na_values()` and `get_na_range()` #' to `na_range()`. #' #' `set_na_values()` and `set_na_range()` could be used with \pkg{dplyr} #' syntax. #' @return #' `set_na_values()` and `set_na_range()` will return an updated #' copy of `.data`. #' @examples #' if (require(dplyr)) { #' # setting value label and user NAs #' df <- tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) %>% #' set_value_labels(s2 = c(yes = 1, no = 2)) %>% #' set_na_values(s2 = 9) #' na_values(df) #' #' # removing missing values #' df <- df %>% set_na_values(s2 = NULL) #' df$s2 #' #' # example with a vector #' v <- 1:10 #' v <- v %>% set_na_values(5, 6, 7) #' v #' v %>% set_na_range(8, 10) #' v %>% set_na_range(.values = c(9, 10)) #' v %>% set_na_values(NULL) #' } #' @export set_na_values <- function(.data, ..., .values = NA, .strict = TRUE) { if (!is.data.frame(.data) && !is.atomic(.data)) stop(".data should be a data.frame or a vector") # vector case if (is.atomic(.data)) { if (!identical(.values, NA)) { na_values(.data) <- .values } else { na_values(.data) <- unname(unlist(rlang::dots_list(...))) } return(.data) } # data.frame case if (!identical(.values, NA)) { if (!.strict) .values <- .values[intersect(names(.values), names(.data))] na_values(.data) <- .values } values <- rlang::dots_list(...) if (.strict && !all(names(values) %in% names(.data))) { missing_names <- stringr::str_c( setdiff(names(values), names(.data)), collapse = ", " ) stop("some variables not found in .data: ", missing_names) } for (v in intersect(names(values), names(.data))) na_values(.data[[v]]) <- values[[v]] .data } #' @rdname na_values #' @export set_na_range <- function(.data, ..., .values = NA, .strict = TRUE) { if (!is.data.frame(.data) && !is.atomic(.data)) stop(".data should be a data.frame or a vector") # vector case if (is.atomic(.data)) { if (!identical(.values, NA)) { na_range(.data) <- .values } else { na_range(.data) <- unname(unlist(rlang::dots_list(...))) } return(.data) } # data.frame case if (!identical(.values, NA)) { if (!.strict) .values <- .values[intersect(names(.values), names(.data))] na_range(.data) <- .values } values <- rlang::dots_list(...) if (.strict && !all(names(values) %in% names(.data))) stop("some variables not found in .data") for (v in intersect(names(values), names(.data))) na_range(.data[[v]]) <- values[[v]] .data } # internal function to test if a value is user_na test_if_user_na <- function(val, na_values = NULL, na_range = NULL) { if (inherits(val, "haven_labelled")) val <- unclass(val) miss <- rep.int(FALSE, length(val)) if (!is.null(na_values)) { miss <- miss | val %in% na_values } if (!is.null(na_range) && is.numeric(val)) { miss <- miss | (val >= na_range[1] & val <= na_range[2] & !is.na(val)) } miss } #' @rdname na_values #' @export is_user_na <- function(x) { test_if_user_na(x, na_values(x), na_range(x)) } #' @rdname na_values #' @export is_regular_na <- function(x) { is.na(x) & !is_user_na(x) & !is_tagged_na(x) } #' @rdname na_values #' @export user_na_to_na <- function(x) { UseMethod("user_na_to_na") } #' @rdname na_values #' @export user_na_to_regular_na <- user_na_to_na #' @export user_na_to_na.default <- function(x) { # do nothing x } #' @export user_na_to_na.haven_labelled_spss <- function(x) { remove_user_na(x, user_na_to_na = TRUE) } #' @export user_na_to_na.data.frame <- function(x) { x[] <- lapply(x, user_na_to_na) x } #' @rdname na_values #' @export user_na_to_tagged_na <- function(x) { UseMethod("user_na_to_tagged_na") } #' @export user_na_to_tagged_na.default <- function(x) { # do nothing x } #' @export user_na_to_tagged_na.haven_labelled_spss <- function(x) { remove_user_na(x, user_na_to_tagged_na = TRUE) } #' @export user_na_to_tagged_na.data.frame <- function(x) { x[] <- lapply(x, user_na_to_tagged_na) x } labelled/R/lookfor.R0000644000176200001440000003124714361242251014024 0ustar liggesusers#' Look for keywords variable names and descriptions / Create a data dictionary #' #' `look_for` emulates the `lookfor` Stata command in \R. It supports #' searching into the variable names of regular \R data frames as well as into #' variable labels descriptions, factor levels and value labels. #' The command is meant to help users finding variables in large datasets. #' #' When no keyword is provided, it will produce a data dictionary of the overall #' data frame. #' #' @param data a data frame or a survey object #' @param ... optional list of keywords, a character string (or several #' character strings), which can be formatted as a regular expression suitable #' for a [base::grep()] pattern, or a vector of keywords; #' displays all variables if not specified #' @param labels whether or not to search variable labels (descriptions); #' `TRUE` by default #' @param values whether or not to search within values (factor levels or value #' labels); `TRUE` by default #' @param ignore.case whether or not to make the keywords case sensitive; #' `TRUE` by default (case is ignored during matching) #' @param details add details about each variable (full details could be time #' consuming for big data frames, `FALSE` is equivalent to `"none"` #' and `TRUE` to `"full"`) #' @param x a tibble returned by `look_for()` #' @return a tibble data frame featuring the variable position, name and #' description (if it exists) in the original data frame #' @details The function looks into the variable names for matches to the #' keywords. If available, variable labels are included in the search scope. #' Variable labels of data.frame imported with \pkg{foreign} or #' \pkg{memisc} packages will also be taken into account (see [to_labelled()]). #' If no keyword is provided, it will return all variables of `data`. #' #' `look_for()`, `lookfor()` and `generate_dictionary()` are equivalent. #' #' By default, results will be summarized when printing. To deactivate default #' printing, use `dplyr::as_tibble()`. #' #' `lookfor_to_long_format()` could be used to transform results with one row #' per factor level and per value label. #' #' Use `convert_list_columns_to_character()` to convert named list columns into #' character vectors (see examples). #' #' `look_for_and_select()` is a shortcut for selecting some variables and #' applying `dplyr::select()` to return a data frame with only the selected #' variables. #' #' @author François Briatte , #' Joseph Larmarange #' @examples #' look_for(iris) #' #' # Look for a single keyword. #' look_for(iris, "petal") #' look_for(iris, "s") #' iris %>% look_for_and_select("s") %>% head() #' #' # Look for with a regular expression #' look_for(iris, "petal|species") #' look_for(iris, "s$") #' #' # Look for with several keywords #' look_for(iris, "pet", "sp") #' look_for(iris, "pet", "sp", "width") #' look_for(iris, "Pet", "sp", "width", ignore.case = FALSE) #' #' # Look_for can search within factor levels or value labels #' look_for(iris, "vers") #' #' # Quicker search without variable details #' look_for(iris, details = "none") #' #' # To obtain more details about each variable #' look_for(iris, details = "full") #' #' # To deactivate default printing, convert to tibble #' look_for(iris, details = "full") %>% #' dplyr::as_tibble() #' #' # To convert named lists into character vectors #' look_for(iris) %>% convert_list_columns_to_character() #' #' # Long format with one row per factor and per value label #' look_for(iris) %>% lookfor_to_long_format() #' #' # Both functions can be combined #' look_for(iris) %>% #' lookfor_to_long_format() %>% #' convert_list_columns_to_character() #' #' # Labelled data #' d <- dplyr::tibble( #' region = labelled_spss( #' c(1, 2, 1, 9, 2, 3), #' c(north = 1, south = 2, center = 3, missing = 9), #' na_values = 9, #' label = "Region of the respondent" #' ), #' sex = labelled( #' c("f", "f", "m", "m", "m", "f"), #' c(female = "f", male = "m"), #' label = "Sex of the respondent" #' ) #' ) #' look_for(d) #' d %>% #' look_for() %>% #' lookfor_to_long_format() %>% #' convert_list_columns_to_character() #' @source Inspired by the `lookfor` command in Stata. #' @export look_for <- function(data, ..., labels = TRUE, values = TRUE, ignore.case = TRUE, details = c("basic", "none", "full")) { if (inherits(data, c("survey.design", "svyrep.design"))) { data <- data$variables } if (is.logical(details)) { details <- ifelse(details, "full", "none") } else { details <- match.arg(details) } # applying to_labelled data <- to_labelled(data) # search scope n <- names(data) if (!length(n)) stop("there are no names to search in that object") # search function keywords <- c(...) l <- unlist(var_label(data)) if (!is.null(keywords)) { look <- function(x) { grep(paste(keywords, collapse = "|"), x, ignore.case = ignore.case) } # names search x <- look(n) variable <- n[x] # variable labels if (length(l) > 0 && labels) { # search labels y <- look(l) variable <- unique(c(variable, names(l[y]))) } if (values) { # search factor levels fl <- lapply(data, levels) y <- look(fl) variable <- unique(c(variable, names(fl[y]))) # search value levels vl <- lapply(data, val_labels) y <- look(vl) variable <- unique(c(variable, names(vl[y]))) } } else { variable <- n } # output if (length(variable)) { pos <- which(n %in% variable) # reordering according to pos # not forgetting that some variables don't have a label if (length(l)) { res <- dplyr::tibble(pos = pos, variable = n[pos], label = l[n[pos]]) } else { res <- dplyr::tibble(pos = pos, variable = n[pos], label = NA_character_) } if (details != "none") { data <- data %>% dplyr::select(res$variable) n_missing <- function(x) { sum(is.na(x)) } res <- res %>% dplyr::mutate( col_type = unlist(lapply(data, vctrs::vec_ptype_abbr)), missing = unlist(lapply(data, n_missing)), levels = lapply(data, levels), value_labels = lapply(data, val_labels), ) } if (details == "full") { data <- data %>% dplyr::select(res$variable) unique_values <- function(x) { length(unique(x)) } generic_range <- function(x) { if (all(unlist(lapply(x, is.null)))) return(NULL) if (all(is.na(x))) return(NULL) r <- suppressWarnings(try(range(x, na.rm = TRUE), silent = TRUE)) if (inherits(r, "try-error")) return(NULL) r } res <- res %>% dplyr::mutate( class = lapply(data, class), type = unlist(lapply(data, typeof)), na_values = lapply(data, na_values), na_range = lapply(data, na_range), n_na = missing, # retrocompatibility unique_values = unlist(lapply(data, unique_values)), range = lapply(data, generic_range) ) } } else { res <- dplyr::tibble() } # add a look_for class class(res) <- c("look_for", class(res)) res } #' @rdname look_for #' @export lookfor <- look_for #' @rdname look_for #' @export generate_dictionary <- look_for #' @rdname look_for #' @export print.look_for <- function(x, ...) { if (nrow(x) > 0 && all(c("pos", "variable", "label") %in% names(x))) { x <- x %>% lookfor_to_long_format() %>% convert_list_columns_to_character() %>% dplyr::mutate( # display -- when empty label = dplyr::if_else(is.na(.data$label), "\u2014", .data$label) ) if (all(c("value_labels", "levels", "col_type") %in% names(x))) { if (!"range" %in% names(x)) { x$range <- NA_character_ } x <- x %>% dplyr::mutate( values = dplyr::case_when( !is.na(.data$value_labels) ~ .data$value_labels, !is.na(.data$levels) ~ .data$levels, !is.na(.data$range) ~ paste("range:", .data$range), TRUE ~ "" # zero-width space ), variable = dplyr::if_else( duplicated(.data$pos), "", .data$variable ), label = dplyr::if_else(duplicated(.data$pos), "", .data$label), col_type = dplyr::if_else(duplicated(.data$pos), "", .data$col_type), ) if ("missing" %in% names(x)) x <- x %>% dplyr::mutate( missing = dplyr::if_else( duplicated(.data$pos), "", as.character(.data$missing) ) ) if ("unique_values" %in% names(x)) x <- x %>% dplyr::mutate( unique_values = dplyr::if_else( duplicated(.data$pos), "", as.character(.data$unique_values) ) ) if ("na_values" %in% names(x)) x <- x %>% dplyr::mutate( na_values = dplyr::if_else( duplicated(.data$pos), "", as.character(.data$na_values) ) ) if ("na_range" %in% names(x)) x <- x %>% dplyr::mutate( na_range = dplyr::if_else( duplicated(.data$pos), "", as.character(.data$na_range) ) ) x <- x %>% dplyr::mutate( pos = dplyr::if_else( duplicated(.data$pos), "", as.character(.data$pos) ) ) %>% dplyr::select( dplyr::any_of(c("pos", "variable", "label", "col_type", "missing", "unique_values", "values", "na_values", "na_range")) ) } w <- getOption("width") # available width for printing w_pos <- max(3, stringr::str_length(x$pos)) w_variable <- max(5, stringr::str_length(x$variable)) w_label <- max(5, stringr::str_length(x$label)) # nolint if ("values" %in% names(x)) { w_col_type <- max(8, stringr::str_length(x$col_type)) w_values <- max(5, stringr::str_length(x$values)) # nolint # width for labels lw <- w - 8 - w_pos - w_variable - w_col_type lw <- dplyr::case_when( w_values < lw / 2 ~ lw - w_values, w_label < lw / 2 ~ lw - w_label, TRUE ~ trunc(lw / 2) ) x$label <- stringr::str_trunc(x$label, lw, ellipsis = "~") x$values <- stringr::str_trunc(x$values, lw, ellipsis = "~") } else { # width for labels lw <- w - 4 - w_pos - w_variable x$label <- stringr::str_trunc(x$label, lw, ellipsis = "~") } print.data.frame(x, row.names = FALSE, quote = FALSE, right = FALSE) } else if (nrow(x) == 0) { message("Nothing found. Sorry.") } else { print(dplyr::as_tibble(x)) } } #' @rdname look_for #' @export look_for_and_select <- function( data, ..., labels = TRUE, values = TRUE, ignore.case = TRUE ) { lf <- data %>% look_for( ..., labels = labels, values = values, ignore.case = ignore.case, details = "none" ) data %>% dplyr::select(lf$pos) } #' @rdname look_for #' @export convert_list_columns_to_character <- function(x) { if ("range" %in% names(x)) x <- x %>% dplyr::mutate(range = unlist(lapply(range, paste, collapse = " - "))) if ("value_labels" %in% names(x) && is.list(x$value_labels)) x <- x %>% dplyr::mutate(value_labels = names_prefixed_by_values(.data$value_labels)) x %>% dplyr::as_tibble() %>% # remove look_for class dplyr::mutate( dplyr::across( where(is.list), ~ unlist(lapply(.x, paste, collapse = "; ")) ) ) } #' @rdname look_for #' @export lookfor_to_long_format <- function(x) { # only if details are provided if (!"levels" %in% names(x) || !"value_labels" %in% names(x)) return(x) x <- x %>% dplyr::as_tibble() %>% # remove look_for class dplyr::mutate(value_labels = names_prefixed_by_values(.data$value_labels)) # tidyr::unnest() fails if all elements are NULL if (all(unlist(lapply(x$levels, is.null)))) x$levels <- NA_character_ if (all(unlist(lapply(x$value_labels, is.null)))) x$value_labels <- NA_character_ x %>% tidyr::unnest("levels", keep_empty = TRUE) %>% tidyr::unnest("value_labels", keep_empty = TRUE) } labelled/R/recode_if.R0000644000176200001440000000351314357761455014304 0ustar liggesusers#' Recode some values based on condition #' #' @param x vector to be recoded #' @param condition logical vector of same length as `x` #' @param true values to use for `TRUE` values of `condition`. It must be #' either the same length as `x`, or length 1. #' @return Returns `x` with values replaced by `true` when `condition` is #' `TRUE` and unchanged when `condition` is `FALSE` or `NA`. Variable and value #' labels are preserved unchanged. #' @export #' @examples #' v <- labelled(c(1,2,2,9), c(yes = 1, no = 2)) #' v %>% recode_if(v == 9, NA) #' if (require(dplyr)) { #' df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 2, 1)) %>% #' set_value_labels( #' s1 = c(Male = "M", Female = "F"), #' s2 = c(A = 1, B = 2) #' ) %>% #' set_variable_labels(s1 = "Gender", s2 = "Group") #' #' df <- df %>% #' mutate( #' s3 = s2 %>% recode_if(s1 == "F", 2), #' s4 = s2 %>% recode_if(s1 == "M", s2 + 10) #' ) #' df #' df %>% look_for() #' } recode_if <- function(x, condition, true) { if (!is.logical(condition)) stop("'condition' should be logical.") if (length(x) != length(condition)) stop("'condition' and 'x' should have the same length.") if (length(true) > 1 && length(true) != length(x)) stop("'true' should be unique or of same length as 'x'.") original_class <- class(x) condition[is.na(condition)] <- FALSE if (length(true) == 1) { x[condition] <- true } else { x[condition] <- true[condition] } if (!identical(class(x), original_class)) warning( "Class of 'x' has changed and is now equal to \"", paste(class(x), collapse = ", "), "\".\n", "This is usually the case when class of 'value' is different from `x`\n.", "and forced R to coerce 'x' to the class of 'value'." ) x } labelled/R/tagged_na.R0000644000176200001440000001424414357761455014301 0ustar liggesusers#' @importFrom haven tagged_na #' @export haven::tagged_na #' @importFrom haven na_tag #' @export haven::na_tag #' @importFrom haven is_tagged_na #' @export haven::is_tagged_na #' @importFrom haven format_tagged_na #' @export haven::format_tagged_na #' @importFrom haven print_tagged_na #' @export haven::print_tagged_na #' Unique elements, duplicated, ordering and sorting with tagged NAs #' #' These adaptations of [base::unique()], [base::duplicated()], #' [base::order()] and [base::sort()] treats tagged NAs as distinct #' values. #' #' @param x a vector #' @param fromLast logical indicating if duplication should be #' considered from the last #' @export #' @examples #' x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA) #' x %>% print_tagged_na() #' #' unique(x) %>% print_tagged_na() #' unique_tagged_na(x) %>% print_tagged_na() #' #' duplicated(x) #' duplicated_tagged_na(x) #' #' order(x) #' order_tagged_na(x) #' #' sort(x, na.last = TRUE) %>% print_tagged_na() #' sort_tagged_na(x) %>% print_tagged_na() unique_tagged_na <- function(x, fromLast = FALSE) { x[!duplicated_tagged_na(x, fromLast = fromLast)] } #' @export #' @rdname unique_tagged_na duplicated_tagged_na <- function(x, fromLast = FALSE) { if (!is.double(x)) return(duplicated(x, fromLast = fromLast)) res <- duplicated(x, fromLast = fromLast, incomparables = NA) if (anyNA(x)) res[is.na(x)] <- duplicated( format_tagged_na(x[is.na(x)]), fromLast = fromLast ) res } #' @rdname unique_tagged_na #' @param na.last if `TRUE`, missing values in the data are put last; #' if `FALSE`, they are put first #' @param decreasing should the sort order be increasing or decreasing? #' @param method the method to be used, see [base::order()] #' @param na_decreasing should the sort order for tagged NAs value be #' @param untagged_na_last should untagged `NA`s be sorted after tagged `NA`s? #' increasing or decreasing? #' @export order_tagged_na <- function(x, na.last = TRUE, decreasing = FALSE, method = c("auto", "shell", "radix"), na_decreasing = decreasing, untagged_na_last = TRUE) { if (!is.double(x)) return( order(x, na.last = na.last, decreasing = decreasing, method = method) ) res <- order( x, na.last = TRUE, decreasing = decreasing, method = method ) if (anyNA(x)) { n_na <- sum(is.na(x)) if (n_na < length(x)) { res <- res[1:(length(x) - n_na)] } else { res <- NULL } t_na <- format_tagged_na(x) t_na[!is.na(x)] <- NA if (xor(untagged_na_last, na_decreasing)) t_na[is.na(x) & !is_tagged_na(x)] <- "ZZZ" na_order <- order( t_na, na.last = TRUE, decreasing = na_decreasing, method = method ) na_order <- na_order[1:n_na] if (na.last) res <- c(res, na_order) else res <- c(na_order, res) } res } #' @rdname unique_tagged_na #' @export sort_tagged_na <- function(x, decreasing = FALSE, na.last = TRUE, na_decreasing = decreasing, untagged_na_last = TRUE) { x[order_tagged_na( x, decreasing = decreasing, na.last = na.last, na_decreasing = na_decreasing, untagged_na_last = untagged_na_last )] } #' Convert tagged NAs into user NAs #' #' [tagged_na_to_user_na()] is the opposite of [user_na_to_tagged_na()] and #' convert tagged `NA` into user defined missing values (see [labelled_spss()]). #' #' [tagged_na_to_regular_na()] converts tagged NAs into regular NAs. #' #' @param x a vector or a data frame #' @param user_na_start minimum value of the new user na, if `NULL`, #' computed automatically (maximum of observed values + 1) #' @export #' @examples #' x <- c(1:5, tagged_na("a"), tagged_na("z"), NA) #' x #' print_tagged_na(x) #' tagged_na_to_user_na(x) #' tagged_na_to_user_na(x, user_na_start = 10) #' #' y <- c(1, 0, 1, tagged_na("r"), 0, tagged_na("d")) #' val_labels(y) <- c( #' no = 0, yes = 1, #' "don't know" = tagged_na("d"), #' refusal = tagged_na("r") #' ) #' y #' tagged_na_to_user_na(y, user_na_start = 8) #' tagged_na_to_regular_na(y) #' tagged_na_to_regular_na(y) %>% is_tagged_na() tagged_na_to_user_na <- function(x, user_na_start = NULL) { UseMethod("tagged_na_to_user_na") } #' @export tagged_na_to_user_na.default <- function(x, user_na_start = NULL) { # do nothing x } #' @export tagged_na_to_user_na.double <- function(x, user_na_start = NULL) { if (is.null(user_na_start)) user_na_start <- trunc(max(x, na.rm = TRUE)) + 1 tn <- x[is_tagged_na(x)] %>% unique_tagged_na() %>% sort_tagged_na() if (length(tn) == 0) return(x) labels <- val_labels(x) for (i in seq_along(tn)) { new_val <- user_na_start + i - 1 if (any(x == new_val, na.rm = TRUE)) stop( "Value ", new_val, " is already used in 'x'. Please change 'user_na_start'." ) x[is_tagged_na(x, na_tag(tn[i]))] <- new_val if (any(is_tagged_na(labels, na_tag(tn[i])), na.rm = TRUE)) { labels[is_tagged_na(labels, na_tag(tn[i]))] <- new_val } else { names(new_val) <- format_tagged_na(tn[i]) labels <- c(labels, new_val) } } if (length(labels) > 0) val_labels(x) <- labels na_range(x) <- c(user_na_start, user_na_start + length(tn) - 1) x } #' @export tagged_na_to_user_na.data.frame <- function(x, user_na_start = NULL) { x[] <- lapply(x, tagged_na_to_user_na, user_na_start = user_na_start) x } #' @rdname tagged_na_to_user_na #' @export tagged_na_to_regular_na <- function(x) { UseMethod("tagged_na_to_regular_na") } #' @export tagged_na_to_regular_na.default <- function(x) { # do nothing x } #' @export tagged_na_to_regular_na.double <- function(x) { x[is_tagged_na(x)] <- NA # removing value labels attached to tagged NAs, if any vl <- val_labels(x) if (any(is_tagged_na(vl))) val_labels(x) <- vl[!is_tagged_na(vl)] x } #' @export tagged_na_to_regular_na.data.frame <- function(x) { x[] <- lapply(x, tagged_na_to_regular_na) x } labelled/R/labelled-package.R0000644000176200001440000000041514357761455015520 0ustar liggesusers## usethis namespace: start #' @importFrom lifecycle deprecate_soft #' @importFrom dplyr .data ## usethis namespace: end NULL # because `where` is not exported by tidyselect # cf. https://github.com/r-lib/tidyselect/issues/201 utils::globalVariables("where") labelled/R/to_na.R0000644000176200001440000000257414357761455013473 0ustar liggesusers#' Recode values with no label to NA #' #' For labelled variables, values with no label will be recoded to `NA`. #' #' @param x Object to recode. #' @examples #' v <- labelled(c(1, 2, 9, 1, 9), c(yes = 1, no = 2)) #' nolabel_to_na(v) #' @export nolabel_to_na <- function(x) { UseMethod("nolabel_to_na") } #' @export nolabel_to_na.default <- function(x) { x } #' @export nolabel_to_na.haven_labelled <- function(x) { allval <- unique(x) allval <- allval[!is.na(allval)] nolabel <- allval[!allval %in% val_labels(x)] if (length(nolabel) > 0) x[x %in% nolabel] <- NA x } #' @export nolabel_to_na.data.frame <- function(x) { x[] <- lapply(x, nolabel_to_na) x } #' Recode value labels to NA #' #' For labelled variables, values with a label will be recoded to `NA`. #' #' @param x Object to recode. #' @seealso [haven::zap_labels()] #' @examples #' v <- labelled(c(1, 2, 9, 1, 9), c(dk = 9)) #' val_labels_to_na(v) #' @export val_labels_to_na <- function(x) { UseMethod("val_labels_to_na") } #' @export val_labels_to_na.default <- function(x) { x } #' @export val_labels_to_na.haven_labelled <- function(x) { val <- val_labels(x) if (length(val) > 0) x[x %in% val] <- NA val_labels(x) <- NULL x } #' @export val_labels_to_na.data.frame <- function(x) { x[] <- lapply(x, val_labels_to_na) x } labelled/R/remove_labels.R0000644000176200001440000001433514357761455015210 0ustar liggesusers#' Remove variable label, value labels and user defined missing values #' #' Use `remove_var_label()` to remove variable label, `remove_val_labels()` #' to remove value labels, `remove_user_na()` to remove user defined missing #' values (*na_values* and *na_range*) and `remove_labels()` to remove all. #' #' @param x A vector or a data frame. #' @param user_na_to_na Convert user defined missing values into `NA`? #' @param user_na_to_tagged_na Convert user defined missing values into #' tagged `NA`? It could be applied only to numeric vectors. Note that integer #' labelled vectors will be converted to double labelled vectors. #' @param keep_var_label Keep variable label? #' @details #' Be careful with `remove_user_na()` and `remove_labels()`, user defined #' missing values will not be automatically converted to `NA`, except if you #' specify `user_na_to_na = TRUE`. #' `user_na_to_na(x)` is an equivalent of #' `remove_user_na(x, user_na_to_na = TRUE)`. #' #' If you prefer to convert variables with value labels into factors, use #' [to_factor()] or use [unlabelled()]. #' @examples #' x <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) #' var_label(x) <- "A variable" #' x #' #' remove_labels(x) #' remove_labels(x, user_na_to_na = TRUE) #' remove_user_na(x, user_na_to_na = TRUE) #' remove_user_na(x, user_na_to_tagged_na = TRUE) #' @export remove_labels <- function(x, user_na_to_na = FALSE, keep_var_label = FALSE, user_na_to_tagged_na = FALSE ) { UseMethod("remove_labels") } #' @export remove_labels.default <- function(x, user_na_to_na = FALSE, keep_var_label = FALSE, user_na_to_tagged_na = FALSE ) { if (!keep_var_label) var_label(x) <- NULL val_labels(x) <- NULL attr(x, "format.spss") <- NULL x } #' @export remove_labels.haven_labelled_spss <- function(x, user_na_to_na = FALSE, keep_var_label = FALSE, user_na_to_tagged_na = FALSE ) { x <- remove_user_na( x, user_na_to_na = user_na_to_na, user_na_to_tagged_na = user_na_to_tagged_na ) if (!keep_var_label) var_label(x) <- NULL val_labels(x) <- NULL attr(x, "format.spss") <- NULL x } #' @export remove_labels.data.frame <- function(x, user_na_to_na = FALSE, keep_var_label = FALSE, user_na_to_tagged_na = FALSE ) { x[] <- lapply( x, remove_labels, user_na_to_na = user_na_to_na, keep_var_label = keep_var_label, user_na_to_tagged_na = user_na_to_tagged_na ) x } #' @rdname remove_labels #' @export remove_var_label <- function(x) { UseMethod("remove_var_label") } #' @export remove_var_label.default <- function(x) { var_label(x) <- NULL x } #' @export remove_var_label.data.frame <- function(x) { x[] <- lapply(x, remove_var_label) x } #' @rdname remove_labels #' @export remove_val_labels <- function(x) { UseMethod("remove_val_labels") } #' @export remove_val_labels.default <- function(x) { val_labels(x) <- NULL x } #' @export remove_val_labels.data.frame <- function(x) { x[] <- lapply(x, remove_val_labels) x } #' @rdname remove_labels #' @export remove_user_na <- function(x, user_na_to_na = FALSE, user_na_to_tagged_na = FALSE ) { UseMethod("remove_user_na") } #' @export remove_user_na.default <- function(x, user_na_to_na = FALSE, user_na_to_tagged_na = FALSE ) { # do nothing x } #' @export remove_user_na.haven_labelled_spss <- function(x, user_na_to_na = FALSE, user_na_to_tagged_na = FALSE ) { if (user_na_to_tagged_na) { if (typeof(x) == "character") stop( "'user_na_to_tagged_na' cannot be used with character labelled vectors." ) val_to_tag <- x[is.na(x) & !is.na(unclass(x))] %>% unclass() %>% unique() %>% sort() if (length(val_to_tag) > 26) { warning( length(val_to_tag), " different user-defined missing values found in 'x'.\n", "A maximum of 26 could be tagged.\n", "'user_na_to_tagged_na' has been ignored.\n", "'user_na_to_na = TRUE' has been used instead." ) user_na_to_na <- TRUE } else { if (is.integer(x)) { x <- as.double(unclass(x)) %>% copy_labels_from(x) message("'x' has been converted into a double vector.") val_to_tag <- as.double(val_to_tag) } user_na_to_na <- FALSE vl <- val_labels(x) x <- remove_user_na(x) # to avoid error when combining labelled_spss for (i in seq_along(val_to_tag)) { x[x == val_to_tag[i]] <- tagged_na(letters[i]) if (val_to_tag[i] %in% vl) { vl[vl == val_to_tag[i]] <- tagged_na(letters[i]) } } val_labels(x) <- vl } } if (user_na_to_na) { # removing value labels attached to user_na for ( val in val_labels(x)[test_if_user_na(val_labels(x), na_values(x), na_range(x))] ) val_label(x, val) <- NULL x[is.na(x)] <- NA } na_values(x) <- NULL na_range(x) <- NULL x } #' @export remove_user_na.data.frame <- function(x, user_na_to_na = FALSE, user_na_to_tagged_na = FALSE ) { x[] <- lapply( x, remove_user_na, user_na_to_na = user_na_to_na, user_na_to_tagged_na = user_na_to_tagged_na ) x } labelled/R/val_labels.R0000644000176200001440000003264614444527456014500 0ustar liggesusers#' Get / Set value labels #' #' @param x A vector or a data.frame #' @param prefixed Should labels be prefixed with values? #' @param v A single value. #' @param value A named vector for `val_labels()` (see [haven::labelled()]) or #' a character string for `val_label()`. `NULL` to remove the labels. #' For data frames, it could also be a named list with a vector of value #' labels per variable. #' @return #' `val_labels()` will return a named vector. #' `val_label()` will return a single character string. #' @examples #' v <- labelled(c(1,2,2,2,3,9,1,3,2,NA), c(yes = 1, no = 3, "don't know" = 9)) #' val_labels(v) #' val_labels(v, prefixed = TRUE) #' val_label(v, 2) #' val_label(v, 2) <- 'maybe' #' val_label(v, 9) <- NULL #' val_labels(v) <- NULL #' @export val_labels <- function(x, prefixed = FALSE) { UseMethod("val_labels") } #' @export val_labels.default <- function(x, prefixed = FALSE) { # return nothing NULL } #' @export val_labels.haven_labelled <- function(x, prefixed = FALSE) { labels <- attr(x, "labels", exact = TRUE) if (prefixed) names(labels) <- names_prefixed_by_values(labels) labels } #' @export val_labels.data.frame <- function(x, prefixed = FALSE) { lapply(x, val_labels, prefixed = prefixed) } #' @rdname val_labels #' @export `val_labels<-` <- function(x, value) { UseMethod("val_labels<-") } #' @export `val_labels<-.default` <- function(x, value) { # do nothing x } #' @export `val_labels<-.factor` <- function(x, value) { if (!is.null(value)) stop("Value labels cannot be applied to factors.") x %>% remove_attributes("labels") } #' @export `val_labels<-.numeric` <- function(x, value) { if (!is.null(value) && length(value) > 0) { x <- labelled(x, value, label = var_label(x)) } x } #' @export `val_labels<-.character` <- function(x, value) { if (!is.null(value) && length(value) > 0) { x <- labelled(x, value, label = var_label(x)) } x } #' @export `val_labels<-.haven_labelled` <- function(x, value) { if (length(value) == 0) value <- NULL if (is.null(value)) { x <- unclass(x) attr(x, "labels") <- NULL } else { x <- labelled(x, value, label = var_label(x)) } x } #' @export `val_labels<-.haven_labelled_spss` <- function(x, value) { if (length(value) == 0) value <- NULL if (is.null(value) && is.null(attr(x, "na_values")) && is.null(attr(x, "na_range"))) { x <- unclass(x) attr(x, "labels") <- NULL } else { x <- labelled_spss( x, value, na_values = attr(x, "na_values"), na_range = attr(x, "na_range"), label = var_label(x) ) } x } #' @export `val_labels<-.data.frame` <- function(x, value) { if (!is.list(value)) { temp <- as.list(rep(1, ncol(x))) names(temp) <- names(x) value <- lapply(temp, function(x) { x <- value }) } if (!all(names(value) %in% names(x))) { missing_names <- stringr::str_c( setdiff(names(value), names(x)), collapse = ", " ) stop("some variables not found in x:", missing_names) } for (var in names(value)) if (!is.null(value[[var]])) { if (mode(x[[var]]) != mode(value[[var]])) stop("`x` and `value` must be same type", call. = FALSE, domain = "R-labelled") if (typeof(x[[var]]) != typeof(value[[var]])) mode(value[[var]]) <- typeof(x[[var]]) } for (var in names(value)) val_labels(x[[var]]) <- value[[var]] x } #' @rdname val_labels #' @export val_label <- function(x, v, prefixed = FALSE) { UseMethod("val_label") } #' @export val_label.default <- function(x, v, prefixed = FALSE) { if (length(v) != 1) stop("`v` should be a single value", call. = FALSE, domain = "R-labelled") # return nothing NULL } #' @export val_label.haven_labelled <- function(x, v, prefixed = FALSE) { if (length(v) != 1) stop("`v` should be a single value", call. = FALSE, domain = "R-labelled") labels <- val_labels(x, prefixed = prefixed) if (v %in% labels) { names(labels)[labels == v] } else { NULL } } #' @export val_label.data.frame <- function(x, v, prefixed = FALSE) { lapply(x, val_label, v = v, prefixed = prefixed) } #' @rdname val_labels #' @export `val_label<-` <- function(x, v, value) { UseMethod("val_label<-") } #' @export `val_label<-.default` <- function(x, v, value) { # do nothing x } #' @export `val_label<-.factor` <- function(x, v, value) { if (!is.null(value)) stop("Value labels cannot be applied to factors.") x } #' @export `val_label<-.haven_labelled` <- function(x, v, value) { if (length(v) != 1) stop("`v` should be a single value", call. = FALSE, domain = "R-labelled") if (length(value) > 1) stop("`value` should be a single character string or NULL", call. = FALSE, domain = "R-labelled") labels <- val_labels(x) if (is.null(value)) { if (v %in% labels) labels <- labels[labels != v] } else { if (v %in% labels) { names(labels)[labels == v] <- value } else { names(v) <- value labels <- c(labels, v) } } if (length(labels) == 0) labels <- NULL val_labels(x) <- labels x } #' @export `val_label<-.numeric` <- function(x, v, value) { `val_label<-.haven_labelled`(x = x, v = v, value = value) } #' @export `val_label<-.character` <- function(x, v, value) { `val_label<-.haven_labelled`(x = x, v = v, value = value) } #' @export `val_label<-.data.frame` <- function(x, v, value) { if (!is.list(value)) { temp <- as.list(rep(1, ncol(x))) names(temp) <- names(x) value <- lapply(temp, function(x) { x <- value }) } value <- value[names(value) %in% names(x)] for (var in names(value)[]) { if (!is.character(value[[var]]) && !is.null(value[[var]])) stop("`value` should contain only characters or NULL", call. = FALSE, domain = "R-labelled") if (length(value[[var]]) > 1) stop("`value` should contain only one string (or NULL) per variable", call. = FALSE, domain = "R-labelled") } for (var in names(value)) val_label(x[[var]], v) <- value[[var]] x } #' @rdname val_labels #' @export get_value_labels <- val_labels #' @rdname val_labels #' @param .data a data frame or a vector #' @param ... name-value pairs of value labels (see examples) #' @param .labels value labels to be applied to the data.frame, #' using the same syntax as `value` in `val_labels(df) <- value`. #' @param .strict should an error be returned if some labels #' doesn't correspond to a column of `x`? #' @note #' `get_value_labels()` is identical to `val_labels()`. #' #' `set_value_labels()`, `add_value_labels()` and `remove_value_labels()` #' could be used with \pkg{dplyr} syntax. #' While `set_value_labels()` will replace the list of value labels, #' `add_value_labels()` and `remove_value_labels()` will update that list #' (see examples). #' #' `set_value_labels()` could also be applied to a vector / a data.frame column. #' In such case, you can provide a vector of value labels using `.labels` or #' several name-value pairs of value labels (see example). #' Similarly, `add_value_labels()` and `remove_value_labels()` could also be #' applied on vectors. #' @return #' `set_value_labels()`, `add_value_labels()` and `remove_value_labels()` will #' return an updated copy of `.data`. #' @examples #' if (require(dplyr)) { #' # setting value labels #' df <- tibble(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) %>% #' set_value_labels( #' s1 = c(Male = "M", Female = "F"), #' s2 = c(Yes = 1, No = 2) #' ) #' val_labels(df) #' #' # updating value labels #' df <- df %>% add_value_labels(s2 = c(Unknown = 9)) #' df$s2 #' #' # removing a value labels #' df <- df %>% remove_value_labels(s2 = 9) #' df$s2 #' #' # removing all value labels #' df <- df %>% set_value_labels(s2 = NULL) #' df$s2 #' #' # example on a vector #' v <- 1:4 #' v <- set_value_labels(v, min = 1, max = 4) #' v #' v %>% set_value_labels(middle = 3) #' v %>% set_value_labels(NULL) #' v %>% set_value_labels(.labels = c(a = 1, b = 2, c= 3, d = 4)) #' v %>% add_value_labels(between = 2) #' v %>% remove_value_labels(4) #' } #' @export set_value_labels <- function(.data, ..., .labels = NA, .strict = TRUE) { if (!is.data.frame(.data) && !is.atomic(.data)) stop(".data should be a data.frame or a vector") # vector case if (is.atomic(.data)) { if (!identical(.labels, NA)) { val_labels(.data) <- .labels } else { val_labels(.data) <- values <- unlist(rlang::dots_list(...)) } return(.data) } # data.frame case if (!identical(.labels, NA)) { if (!.strict) .labels <- .labels[intersect(names(.labels), names(.data))] val_labels(.data) <- .labels } values <- rlang::dots_list(...) if (.strict && !all(names(values) %in% names(.data))) { missing_names <- stringr::str_c( setdiff(names(values), names(.data)), collapse = ", " ) stop("some variables not found in .data: ", missing_names) } for (v in intersect(names(values), names(.data))) val_labels(.data[[v]]) <- values[[v]] .data } #' @rdname val_labels #' @export add_value_labels <- function(.data, ..., .strict = TRUE) { if (!is.data.frame(.data) && !is.atomic(.data)) stop(".data should be a data.frame or a vector") # vector case if (is.atomic(.data)) { values <- unlist(rlang::dots_list(...)) if (is.null(names(values)) || any(names(values) == "")) stop("all arguments should be named") for (v in names(values)) val_label(.data, values[[v]]) <- v return(.data) } # data.frame case values <- rlang::dots_list(...) if (.strict && !all(names(values) %in% names(.data))) { missing_names <- stringr::str_c( setdiff(names(values), names(.data)), collapse = ", " ) stop("some variables not found in .data: ", missing_names) } for (v in values) if (is.null(names(v)) || any(names(v) == "")) stop("all arguments should be named vectors") for (v in intersect(names(values), names(.data))) for (l in names(values[[v]])) val_label(.data[[v]], values[[v]][[l]]) <- l .data } #' @rdname val_labels #' @export remove_value_labels <- function(.data, ..., .strict = TRUE) { if (!is.data.frame(.data) && !is.atomic(.data)) stop(".data should be a data.frame or a vector") # vector case if (is.atomic(.data)) { values <- unlist(rlang::dots_list(...)) for (v in values) val_label(.data, v) <- NULL return(.data) } # data.frame case values <- rlang::dots_list(...) if (.strict && !all(names(values) %in% names(.data))) { missing_names <- stringr::str_c( setdiff(names(values), names(.data)), collapse = ", " ) stop("some variables not found in .data: ", missing_names) } for (v in intersect(names(values), names(.data))) for (l in values[[v]]) val_label(.data[[v]], l) <- NULL .data } #' Sort value labels #' #' Sort value labels according to values or to labels #' #' @param x A labelled vector or a data.frame #' @param according_to According to values or to labels? #' @param decreasing In decreasing order? #' @examples #' v <- labelled(c(1, 2, 3), c(maybe = 2, yes = 1, no = 3)) #' v #' sort_val_labels(v) #' sort_val_labels(v, decreasing = TRUE) #' sort_val_labels(v, 'l') #' sort_val_labels(v, 'l', TRUE) #' @export sort_val_labels <- function(x, according_to = c("values", "labels"), decreasing = FALSE) { UseMethod("sort_val_labels") } #' @export sort_val_labels.default <- function(x, according_to = c("values", "labels"), decreasing = FALSE) { # do nothing x } #' @export sort_val_labels.haven_labelled <- function(x, according_to = c("values", "labels"), decreasing = FALSE) { according_to <- match.arg(according_to) labels <- val_labels(x) if (!is.null(labels)) { if (according_to == "values") labels <- sort_tagged_na(labels, decreasing = decreasing) if (according_to == "labels") labels <- labels[order(names(labels), decreasing = decreasing)] val_labels(x) <- labels } x } #' @export sort_val_labels.data.frame <- function(x, according_to = c("values", "labels"), decreasing = FALSE) { x[] <- lapply(x, sort_val_labels, according_to = according_to, decreasing = decreasing) x } #' Turn a named vector into a vector of names prefixed by values #' @param x vector to be prefixed #' @examples #' df <- dplyr::tibble( #' c1 = labelled(c("M", "M", "F"), c(Male = "M", Female = "F")), #' c2 = labelled(c(1, 1, 2), c(Yes = 1, No = 2)) #' ) #' val_labels(df$c1) #' val_labels(df$c1) %>% names_prefixed_by_values() #' val_labels(df) #' val_labels(df) %>% names_prefixed_by_values() #' @export names_prefixed_by_values <- function(x) { UseMethod("names_prefixed_by_values") } #' @export names_prefixed_by_values.default <- function(x) { if (is.null(x)) return(NULL) res <- as.character(x) if (is.double(x)) { res[is_tagged_na(x)] <- format_tagged_na(x[is_tagged_na(x)]) } res <- paste0("[", res, "] ", names(x)) names(res) <- names(x) res } #' @export names_prefixed_by_values.list <- function(x) { lapply(x, names_prefixed_by_values) } labelled/R/labelled.R0000644000176200001440000000050614357761455014130 0ustar liggesusers#' @importFrom haven labelled #' @export haven::labelled #' @importFrom haven is.labelled #' @export haven::is.labelled #' @importFrom haven labelled_spss #' @export haven::labelled_spss #' @importFrom haven print_labels #' @export haven::print_labels #' @importFrom dplyr `%>%` #' @export dplyr::`%>%` labelled/R/data.R0000644000176200001440000000052014357761455013271 0ustar liggesusers#' Datasets for testing #' #' These datasets are used to test compatibility with foreign (spss_foreign), #' or haven_2.0 (x_haven_2.0, x_spss_haven_2.0) packages #' @rdname test_datasets "x_haven_2.0" #' @rdname test_datasets "x_spss_haven_2.0" #' @rdname test_datasets "spss_file" #' @rdname test_datasets "dta_file" labelled/R/to_factor.R0000644000176200001440000002052214411236022014316 0ustar liggesusers#' Convert input to a factor. #' #' The base function [base::as.factor()] is not a generic, but this variant #' is. By default, `to_factor()` is a wrapper for [base::as.factor()]. #' Please note that `to_factor()` differs slightly from [haven::as_factor()] #' method provided by \pkg{haven} package. #' #' @param x Object to coerce to a factor. #' @param ... Other arguments passed down to method. #' @export to_factor <- function(x, ...) { UseMethod("to_factor") } #' @export to_factor.factor <- function(x, ...) { x } #' @export to_factor.default <- function(x, ...) { vl <- var_label(x) x <- as.factor(x) var_label(x) <- vl x } #' @rdname to_factor #' @param levels What should be used for the factor levels: the labels, the #' values or labels prefixed with values? #' @param ordered `TRUE` for ordinal factors, `FALSE` (default) for nominal #' factors. #' @param nolabel_to_na Should values with no label be converted to `NA`? #' @param sort_levels How the factor levels should be sorted? (see Details) #' @param decreasing Should levels be sorted in decreasing order? #' @param drop_unused_labels Should unused value labels be dropped? #' (applied only if `strict = FALSE`) #' @param user_na_to_na Convert user defined missing values into `NA`? #' @param strict Convert to factor only if all values have a defined label? #' @param unclass If not converted to a factor (when `strict = TRUE`), #' convert to a character or a numeric factor by applying [base::unclass()]? #' @param explicit_tagged_na Should tagged NA (cf. [haven::tagged_na()]) be #' kept as explicit factor levels? #' @details #' If some values doesn't have a label, automatic labels will be created, #' except if `nolabel_to_na` is `TRUE`. #' #' If `sort_levels == 'values'`, the levels will be sorted according to the #' values of `x`. #' If `sort_levels == 'labels'`, the levels will be sorted according to #' labels' names. #' If `sort_levels == 'none'`, the levels will be in the order the value #' labels are defined in `x`. If some labels are automatically created, they #' will be added at the end. #' If `sort_levels == 'auto'`, `sort_levels == 'none'` will be used, except #' if some values doesn't have a defined label. In such case, #' `sort_levels == 'values'` will be applied. #' @examples #' v <- labelled(c(1,2,2,2,3,9,1,3,2,NA), c(yes = 1, no = 3, "don't know" = 9)) #' to_factor(v) #' to_factor(v, nolabel_to_na = TRUE) #' to_factor(v, 'p') #' to_factor(v, sort_levels = 'v') #' to_factor(v, sort_levels = 'n') #' to_factor(v, sort_levels = 'l') #' #' x <- labelled(c('H', 'M', 'H', 'L'), c(low = 'L', medium = 'M', high = 'H')) #' to_factor(x, ordered = TRUE) #' #' # Strict conversion #' v <- labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)) #' to_factor(v) #' to_factor(v, strict = TRUE) # Not converted because 3 does not have a label #' to_factor(v, strict = TRUE, unclass = TRUE) #' @export to_factor.haven_labelled <- function(x, levels = c("labels", "values", "prefixed"), ordered = FALSE, nolabel_to_na = FALSE, sort_levels = c("auto", "none", "labels", "values"), decreasing = FALSE, drop_unused_labels = FALSE, user_na_to_na = FALSE, strict = FALSE, unclass = FALSE, explicit_tagged_na = FALSE, ...) { vl <- var_label(x) levels <- match.arg(levels) sort_levels <- match.arg(sort_levels) if (user_na_to_na) x <- user_na_to_na(x) if (explicit_tagged_na && is.double(x)) { new_labels <- to_character(val_labels(x), explicit_tagged_na = TRUE) x <- to_character(unclass(x), explicit_tagged_na = TRUE) if (any(is.na(new_labels))) { # regular NA with a label x[is.na(x)] <- "NA" new_labels[is.na(new_labels)] <- "NA" } val_labels(x) <- new_labels } else { l <- val_labels(x) val_labels(x) <- l[!is.na(l)] # keeping not NA values } if (strict) { allval <- unique(x) allval <- allval[!is.na(allval)] nolabel <- allval[!allval %in% val_labels(x)] if (length(nolabel) > 0) { if (unclass) { x <- unclass(x) } return(x) } } if (nolabel_to_na) x <- nolabel_to_na(x) labels <- val_labels(x) allval <- unique(x) allval <- allval[!is.na(allval)] nolabel <- sort(allval[!allval %in% labels]) # if there are some values with no label if (length(nolabel) > 0) { names(nolabel) <- as.character(nolabel) levs <- c(labels, nolabel) } else { levs <- labels } if (sort_levels == "auto" && length(nolabel) > 0) sort_levels <- "values" if (sort_levels == "labels") levs <- levs[order(names(levs), decreasing = decreasing)] if (sort_levels == "values") levs <- sort(levs, decreasing = decreasing) if (levels == "labels") labs <- names(levs) if (levels == "values") labs <- unname(levs) if (levels == "prefixed") labs <- names_prefixed_by_values(levs) levs <- unname(levs) x <- factor(x, levels = levs, labels = labs, ordered = ordered, ...) if (drop_unused_labels) x <- droplevels(x) var_label(x) <- vl x } #' @rdname to_factor #' @param labelled_only for a data.frame, convert only labelled variables to #' factors? #' @details #' When applied to a data.frame, only labelled vectors are converted by #' default to a factor. Use `labelled_only = FALSE` to convert all variables #' to factors. #' @export to_factor.data.frame <- function( x, levels = c("labels", "values", "prefixed"), ordered = FALSE, nolabel_to_na = FALSE, sort_levels = c("auto", "none", "labels", "values"), decreasing = FALSE, labelled_only = TRUE, drop_unused_labels = FALSE, strict = FALSE, unclass = FALSE, explicit_tagged_na = FALSE, ... ) { cl <- class(x) x <- dplyr::as_tibble( lapply( x, .to_factor_col_data_frame, levels = levels, ordered = ordered, nolabel_to_na = nolabel_to_na, sort_levels = sort_levels, decreasing = decreasing, labelled_only = labelled_only, drop_unused_labels = drop_unused_labels, strict = strict, unclass = unclass, explicit_tagged_na = explicit_tagged_na, ... ) ) class(x) <- cl x } .to_factor_col_data_frame <- function( x, levels = c("labels", "values", "prefixed"), ordered = FALSE, nolabel_to_na = FALSE, sort_levels = c("auto", "none", "labels", "values"), decreasing = FALSE, labelled_only = TRUE, drop_unused_labels = FALSE, strict = FALSE, unclass = FALSE, explicit_tagged_na = FALSE, ... ) { if (inherits(x, "haven_labelled")) x <- to_factor(x, levels = levels, ordered = ordered, nolabel_to_na = nolabel_to_na, sort_levels = sort_levels, decreasing = decreasing, drop_unused_labels = drop_unused_labels, strict = strict, unclass = unclass, explicit_tagged_na = explicit_tagged_na, ...) else if (!labelled_only) x <- to_factor(x) x } #' @rdname to_factor #' @description #' `unlabelled(x)` is a shortcut for #' `to_factor(x, strict = TRUE, unclass = TRUE, labelled_only = TRUE)`. #' @details #' `unlabelled()` is a shortcut for quickly removing value labels of a vector #' or of a data.frame. If all observed values have a value label, then the #' vector will be converted into a factor. Otherwise, the vector will be #' unclassed. #' If you want to remove value labels in all cases, use [remove_val_labels()]. #' @examples #' #' df <- data.frame( #' a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), #' b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), #' c = labelled( #' c("a", "a", "b", "c"), #' labels = c(No = "a", Maybe = "b", Yes = "c") #' ), #' d = 1:4, #' e = factor(c("item1", "item2", "item1", "item2")), #' f = c("itemA", "itemA", "itemB", "itemB"), #' stringsAsFactors = FALSE #' ) #' if (require(dplyr)) { #' glimpse(df) #' glimpse(unlabelled(df)) #' } #' @export unlabelled <- function(x, ...) { if (is.data.frame(x)) to_factor(x, strict = TRUE, unclass = TRUE, labelled_only = TRUE, ...) else if (inherits(x, "haven_labelled")) to_factor(x, strict = TRUE, unclass = TRUE, ...) else x } labelled/R/to_labelled.R0000644000176200001440000002147214411254215014615 0ustar liggesusers#' Convert to labelled data #' #' Convert a factor or data imported with \pkg{foreign} or \pkg{memisc} to #' labelled data. #' #' @param x Factor or dataset to convert to labelled data frame #' @param ... Not used #' @details #' `to_labelled()` is a general wrapper calling the appropriate sub-functions. #' #' `memisc_to_labelled()` converts a `memisc::data.set()`]` object created with #' \pkg{memisc} package to a labelled data frame. #' #' `foreign_to_labelled()` converts data imported with [foreign::read.spss()] #' or [foreign::read.dta()] from \pkg{foreign} package to a labelled data frame, #' i.e. using [haven::labelled()]. #' Factors will not be converted. Therefore, you should use #' `use.value.labels = FALSE` when importing with [foreign::read.spss()] or #' `convert.factors = FALSE` when importing with [foreign::read.dta()]. #' #' To convert correctly defined missing values imported with #' [foreign::read.spss()], you should have used `to.data.frame = FALSE` and #' `use.missings = FALSE`. If you used the option `to.data.frame = TRUE`, #' meta data describing missing values will not be attached to the import. #' If you used `use.missings = TRUE`, missing values would have been converted #' to `NA`. #' #' So far, missing values defined in **Stata** are always imported as `NA` by #' [foreign::read.dta()] and could not be retrieved by `foreign_to_labelled()`. #' #' @return A tbl data frame or a labelled vector. #' @seealso [haven::labelled()], [foreign::read.spss()], #' [foreign::read.dta()], `memisc::data.set()`, #' `memisc::importer`, [to_factor()]. #' #' @examples #' \dontrun{ #' # from foreign #' library(foreign) #' sav <- system.file("files", "electric.sav", package = "foreign") #' df <- to_labelled(read.spss( #' sav, #' to.data.frame = FALSE, #' use.value.labels = FALSE, #' use.missings = FALSE #' )) #' #' # from memisc #' library(memisc) #' nes1948.por <- UnZip('anes/NES1948.ZIP', 'NES1948.POR', package='memisc') #' nes1948 <- spss.portable.file(nes1948.por) #' ds <- as.data.set(nes1948) #' df <- to_labelled(ds) #' } #' #' @export to_labelled <- function(x, ...) { UseMethod("to_labelled") } #' @rdname to_labelled #' @export to_labelled.data.frame <- function(x, ...) { foreign_to_labelled(x) } #' @rdname to_labelled #' @export to_labelled.list <- function(x, ...) { foreign_to_labelled(x) } #' @rdname to_labelled #' @export to_labelled.data.set <- function(x, ...) { memisc_to_labelled(x) } #' @rdname to_labelled #' @export to_labelled.importer <- function(x, ...) { memisc_to_labelled(memisc::as.data.set(x)) } #' @rdname to_labelled #' @export foreign_to_labelled <- function(x) { # note: attr(* , 'missings') and attr(*, 'variable.labels') # are lost when applying as.data.frame (if # read.spss(to.data.frame = F)) variable.labels <- attr(x, "variable.labels", exact = TRUE) # read.spss var.labels <- attr(x, "var.labels", exact = TRUE) # read.dta label.table <- attr(x, "label.table", exact = TRUE) # read.dta missings <- attr(x, "missings", exact = TRUE) # read.spss # if imported with read.spss(to.data.frame=FALSE) it's a # list, not a df if (!is.data.frame(x)) { if (requireNamespace("dplyr")) { x <- dplyr::as_tibble(x) } else { x <- as.data.frame(x, stringsAsFactors = FALSE) } } # variable labels (read.spss) if (!is.null(variable.labels)) var_label(x) <- as.list(variable.labels) # variable labels (read.dta) if (!is.null(var.labels)) { names(var.labels) <- names(x) var_label(x) <- as.list(var.labels) } # value labels (read.spss) for (var in names(x)) { if (!is.null(attr(x[[var]], "value.labels", exact = TRUE))) val_labels(x[[var]]) <- attr(x[[var]], "value.labels", exact = TRUE) attr(x[[var]], "value.labels") <- NULL } # value labels (read.dta) if (!is.null(label.table)) { # taking into account only variables existing in x val_labels(x) <- label.table[intersect(names(label.table), names(x))] } # missing values (read.spss) for (var in names(missings)) { if (missings[[var]]$type %in% c("one", "two", "three")) { na_values(x[[var]]) <- missings[[var]]$value } if (missings[[var]]$type %in% c("range", "range+1")) { na_range(x[[var]]) <- missings[[var]]$value[1:2] } if (missings[[var]]$type == "range+1") { na_values(x[[var]]) <- missings[[var]]$value[3] } } # cleaning read.spss attr(x, "variable.labels") <- NULL attr(x, "missings") <- NULL # cleaning read.dta attr(x, "datalabel") <- NULL attr(x, "time.stamp") <- NULL attr(x, "formats") <- NULL attr(x, "types") <- NULL attr(x, "val.labels") <- NULL attr(x, "var.labels") <- NULL attr(x, "version") <- NULL attr(x, "label.table") <- NULL attr(x, "missing") <- NULL # to tbl_df (if no other class already specified) if (length(class(x)) == 1) class(x) <- c("tbl_df", "tbl", "data.frame") x } #' @rdname to_labelled #' @export memisc_to_labelled <- function(x) { if (!inherits(x, "data.set")) return(x) if (!requireNamespace("memisc")) stop("memisc package is required to convert a data.set", call. = FALSE, domain = "R-labelled") df <- as.data.frame(x) for (var in names(x)) { if (length(memisc::description(x[[var]])) > 0) var_label(df[[var]]) <- as.character(memisc::description(x[[var]])) if (length(memisc::labels(x[[var]])) > 0) { labs <- memisc::labels(x[[var]])@values names(labs) <- memisc::labels(x[[var]])@.Data val_labels(df[[var]]) <- labs } if (!is.null(memisc::missing.values(x[[var]])) && length(memisc::missing.values(x[[var]])@filter) > 0) na_values(df[[var]]) <- memisc::missing.values(x[[var]])@filter if (!is.null(memisc::missing.values(x[[var]])) && length(memisc::missing.values(x[[var]])@range) > 0) na_range(df[[var]]) <- memisc::missing.values(x[[var]])@range } dplyr::as_tibble(df) } #' @rdname to_labelled #' @param labels When converting a factor only: #' an optional named vector indicating how factor levels should be coded. #' If a factor level is not found in `labels`, it will be converted to `NA`. #' @param .quiet do not display warnings for prefixed factors with duplicated #' codes #' @details #' If you convert a labelled vector into a factor with prefix, i.e. by using #' [to_factor(levels = "prefixed")][to_factor()], `to_labelled.factor()` is able #' to reconvert it to a labelled vector with same values and labels. #' @export #' @examples #' # Converting factors to labelled vectors #' f <- factor( #' c("yes", "yes", "no", "no", "don't know", "no", "yes", "don't know") #' ) #' to_labelled(f) #' to_labelled(f, c("yes" = 1, "no" = 2, "don't know" = 9)) #' to_labelled(f, c("yes" = 1, "no" = 2)) #' to_labelled(f, c("yes" = "Y", "no" = "N", "don't know" = "DK")) #' #' s1 <- labelled(c('M', 'M', 'F'), c(Male = 'M', Female = 'F')) #' labels <- val_labels(s1) #' f1 <- to_factor(s1) #' f1 #' #' to_labelled(f1) #' identical(s1, to_labelled(f1)) #' to_labelled(f1, labels) #' identical(s1, to_labelled(f1, labels)) #' #' l <- labelled( #' c(1, 1, 2, 2, 9, 2, 1, 9), #' c("yes" = 1, "no" = 2, "don't know" = 9) #' ) #' f <- to_factor(l, levels = "p") #' f #' to_labelled(f) #' identical(to_labelled(f), l) to_labelled.factor <- function(x, labels = NULL, .quiet = FALSE, ...) { vl <- var_label(x) if (is.null(labels)) { # check if levels are formatted as "[code] label" l <- .get_prefixes.factor(x) if (any(is.na(l$code)) || any(is.na(l$code)) || any(duplicated(l$code))) { if (!.quiet && any(duplicated(l$code)) && all(!is.na(l$code)) && all(!is.na(l$code))) warning("'x' looks prefixed, but duplicated codes found.") # normal case labs <- seq_along(levels(x)) names(labs) <- levels(x) x <- labelled(as.numeric(x), labs) } else { # "[code] label" case num_l <- suppressWarnings(as.numeric(l$code)) if (!.quiet && all(!is.na(num_l)) && any(duplicated(num_l))) warning("All codes seem numeric but some duplicates found.") if (all(!is.na(num_l)) && !any(duplicated(num_l))) l$code <- as.numeric(l$code) r <- l$levels names(r) <- l$code levels(x) <- l$code x <- as.character(x) if (is.numeric(l$code)) x <- as.numeric(x) names(l$code) <- l$label x <- labelled(x, l$code) } } else { # labels is not NULL r <- rep_len(NA, length(x)) mode(r) <- mode(labels) for (i in seq_along(labels)) r[x == names(labels)[i]] <- labels[i] x <- labelled(r, labels) } var_label(x) <- vl x } labelled/R/remove_attributes.R0000644000176200001440000000173014444114227016111 0ustar liggesusers#' Remove attributes #' #' This function removes specified attributes. When applied to a data.frame, it #' will also remove recursively the specified attributes to each column of the #' data.frame. #' #' @param x an object #' @param attributes a character vector indicating attributes to remove #' @export #' @examples #' \dontrun{ #' library(haven) #' path <- system.file("examples", "iris.sav", package = "haven") #' d <- read_sav(path) #' str(d) #' d <- remove_attributes(d, "format.spss") #' str(d)} remove_attributes <- function(x, attributes) { UseMethod("remove_attributes") } #' @export remove_attributes.default <- function(x, attributes) { for (a in attributes) attr(x, a) <- NULL x } #' @export remove_attributes.data.frame <- function(x, attributes) { cl <- class(x) x <- remove_attributes.default(x, attributes) x <- dplyr::as_tibble( lapply(x, remove_attributes, attributes = attributes) ) class(x) <- cl x } labelled/R/drop_unused_value_labels.R0000644000176200001440000000132014357761455017424 0ustar liggesusers#' Drop unused value labels #' #' Drop value labels associated to a value not present in the data. #' #' @param x A vector or a data frame. #' @examples #' x <- labelled(c(1, 2, 2, 1), c(yes = 1, no = 2, maybe = 3)) #' x #' drop_unused_value_labels(x) #' @export drop_unused_value_labels <- function(x) { UseMethod("drop_unused_value_labels") } #' @export drop_unused_value_labels.default <- function(x) { # do nothing x } #' @export drop_unused_value_labels.haven_labelled <- function(x) { vl <- val_labels(x) val_labels(x) <- vl[vl %in% unique(x)] x } #' @export drop_unused_value_labels.data.frame <- function(x) { x[] <- lapply(x, drop_unused_value_labels) x } labelled/R/to_character.R0000644000176200001440000000777014444041655015022 0ustar liggesusers#' Convert input to a character vector #' #' By default, `to_character()` is a wrapper for [base::as.character()]. #' For labelled vector, to_character allows to specify if value, labels or #' labels prefixed with values should be used for conversion. #' #' @param x Object to coerce to a character vector. #' @param ... Other arguments passed down to method. #' @param explicit_tagged_na should tagged NA be kept? #' @export to_character <- function(x, ...) { UseMethod("to_character") } #' @export to_character.default <- function(x, ...) { vl <- var_label(x) x <- as.character(x) var_label(x) <- vl x } #' @rdname to_character #' @export to_character.double <- function(x, explicit_tagged_na = FALSE, ...) { res <- as.character(x) if (explicit_tagged_na) res[is_tagged_na(x)] <- format_tagged_na(x[is_tagged_na(x)]) var_label(res) <- var_label(x) names(res) <- names(x) res } #' @rdname to_character #' @param levels What should be used for the factor levels: the labels, the #' values or labels prefixed with values? #' @param nolabel_to_na Should values with no label be converted to `NA`? #' @param user_na_to_na user defined missing values into NA? #' @details #' If some values doesn't have a label, automatic labels will be created, #' except if `nolabel_to_na` is `TRUE`. #' @examples #' v <- labelled(c(1,2,2,2,3,9,1,3,2,NA), c(yes = 1, no = 3, "don't know" = 9)) #' to_character(v) #' to_character(v, nolabel_to_na = TRUE) #' to_character(v, "v") #' to_character(v, "p") #' @export to_character.haven_labelled <- function( x, levels = c("labels", "values", "prefixed"), nolabel_to_na = FALSE, user_na_to_na = FALSE, explicit_tagged_na = FALSE, ... ) { vl <- var_label(x) levels <- match.arg(levels) x <- as.character(to_factor( x, levels = levels, nolabel_to_na = nolabel_to_na, user_na_to_na = user_na_to_na, explicit_tagged_na = explicit_tagged_na )) var_label(x) <- vl x } #' @rdname to_character #' @param labelled_only for a data.frame, convert only labelled variables to #' factors? #' @details #' When applied to a data.frame, only labelled vectors are converted by #' default to character. Use `labelled_only = FALSE` to convert all variables #' to characters. #' @export #' @examples #' #' df <- data.frame( #' a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), #' b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), #' c = labelled( #' c("a", "a", "b", "c"), #' labels = c(No = "a", Maybe = "b", Yes = "c") #' ), #' d = 1:4, #' e = factor(c("item1", "item2", "item1", "item2")), #' f = c("itemA", "itemA", "itemB", "itemB"), #' stringsAsFactors = FALSE #' ) #' #' if (require(dplyr)) { #' glimpse(df) #' glimpse(to_character(df)) #' glimpse(to_character(df, labelled_only = FALSE)) #' } to_character.data.frame <- function( x, levels = c("labels", "values", "prefixed"), nolabel_to_na = FALSE, user_na_to_na = FALSE, explicit_tagged_na = FALSE, labelled_only = TRUE, ... ) { cl <- class(x) x <- dplyr::as_tibble( lapply( x, .to_character_col_data_frame, levels = levels, nolabel_to_na = nolabel_to_na, user_na_to_na = user_na_to_na, explicit_tagged_na = explicit_tagged_na, labelled_only = labelled_only, ... ) ) class(x) <- cl x } .to_character_col_data_frame <- function( x, levels = c("labels", "values", "prefixed"), nolabel_to_na = FALSE, user_na_to_na = FALSE, explicit_tagged_na = FALSE, labelled_only = TRUE, ... ) { if (inherits(x, "haven_labelled")) x <- to_character(x, levels = levels, nolabel_to_na = nolabel_to_na, user_na_to_na = user_na_to_na, explicit_tagged_na = explicit_tagged_na, ...) else if (!labelled_only) x <- to_character(x) x } labelled/R/retrocompatibility.R0000644000176200001440000000420314357761455016307 0ustar liggesusers#' Update labelled data to last version #' #' Labelled data imported with \pkg{haven} version 1.1.2 or before or #' created with [haven::labelled()] version 1.1.0 or before was using #' "labelled" and "labelled_spss" classes. #' #' Since version 2.0.0 of these two packages, "haven_labelled" and #' "haven_labelled_spss" are used instead. #' #' Since haven 2.3.0, "haven_labelled" class has been evolving #' using now \pkg{vctrs} package. #' #' `update_labelled()` convert labelled vectors #' from the old to the new classes and to reconstruct all #' labelled vectors with the last version of the package. #' #' @param x An object (vector or data.frame) to convert. #' @seealso [haven::labelled()], [haven::labelled_spss()] #' @export update_labelled <- function(x) { UseMethod("update_labelled") } #' @export update_labelled.default <- function(x) { # return x x } #' @rdname update_labelled #' @export update_labelled.labelled <- function(x) { # update only previous labelled class, but not objects from Hmisc if (!is.null(attr(x, "labels", exact = TRUE))) { if (is.null(attr(x, "na_values", exact = TRUE)) && is.null(attr(x, "na_range", exact = TRUE))) { x <- labelled( x, labels = attr(x, "labels", exact = TRUE), label = attr(x, "label", exact = TRUE) ) } else { x <- labelled_spss( x, na_values = attr(x, "na_values", exact = TRUE), na_range = attr(x, "range", exact = TRUE), labels = attr(x, "labels", exact = TRUE), label = attr(x, "label", exact = TRUE) ) } } x } #' @rdname update_labelled #' @export update_labelled.haven_labelled_spss <- function(x) { labelled_spss( x, labels = val_labels(x), label = var_label(x), na_values = na_values(x), na_range = na_range(x) ) } #' @rdname update_labelled #' @export update_labelled.haven_labelled <- function(x) { labelled( x, labels = val_labels(x), label = var_label(x) ) } #' @rdname update_labelled #' @export update_labelled.data.frame <- function(x) { x[] <- lapply(x, update_labelled) x } labelled/R/is_prefixed.R0000644000176200001440000000111614357761455014663 0ustar liggesusers#' Check if a factor is prefixed #' @param x a factor #' @export is_prefixed <- function(x) { if (!is.factor(x)) stop("is_prefixed should be used only with a factor.") l <- .get_prefixes.factor(x) all(!is.na(l$code)) && all(!is.na(l$code)) && !any(duplicated(l$code)) } # return a tibble with levels, code and label # if the factor is prefixed, otherwise NA .get_prefixes.factor <- function(x) { dplyr::tibble(levels = levels(x)) %>% tidyr::extract( "levels", c("code", "label"), "^\\[(.+)\\]\\s(.+)$", remove = FALSE ) } labelled/R/recode.R0000644000176200001440000000650014357761455013625 0ustar liggesusers#' Recode values #' #' Extend [dplyr::recode()] method from \pkg{dplyr} to #' works with labelled vectors. #' #' @importFrom dplyr recode #' @inheritParams dplyr::recode #' @param .keep_value_labels If TRUE, keep original value labels. #' If FALSE, remove value labels. #' @param .combine_value_labels If TRUE, will combine original value labels #' to generate new value labels. Note that unexpected results could be #' obtained if a same old value is recoded into several different new values. #' @param .sep Separator to be used when combining value labels. #' @seealso [dplyr::recode()] #' @examples #' x <- labelled(1:3, c(yes = 1, no = 2)) #' x #' dplyr::recode(x, `3` = 2L) #' #' # do not keep value labels #' dplyr::recode(x, `3` = 2L, .keep_value_labels = FALSE) #' #' # be careful, changes are not of the same type (here integers), #' # NA arecreated #' dplyr::recode(x, `3` = 2) #' #' # except if you provide .default or new values for all old values #' dplyr::recode(x, `1` = 1, `2` = 1,`3` = 2) #' #' # if you change the type of the vector (here transformed into character) #' # value labels are lost #' dplyr::recode(x, `3` = "b", .default = "a") #' #' # use .keep_value_labels = FALSE to avoid a warning #' dplyr::recode(x, `3` = "b", .default = "a", .keep_value_labels = FALSE) #' #' # combine value labels #' x <- labelled( #' 1:4, #' c( #' "strongly agree" = 1, #' "agree" = 2, #' "disagree" = 3, #' "strongly disagree" = 4) #' ) #' dplyr::recode( #' x, #' `1` = 1L, #' `2` = 1L, #' `3` = 2L, #' `4` = 2L, #' .combine_value_labels = TRUE #' ) #' dplyr::recode( #' x, #' `2` = 1L, #' `4` = 3L, #' .combine_value_labels = TRUE #' ) #' dplyr::recode( #' x, #' `2` = 1L, #' `4` = 3L, #' .combine_value_labels = TRUE, #' .sep = " or " #' ) #' dplyr::recode( #' x, #' `2` = 1L, #' .default = 2L, #' .combine_value_labels = TRUE #' ) #' #' # example when combining some values without a label #' y <- labelled(1:4, c("strongly agree" = 1)) #' dplyr::recode(y, `2` = 1L, `4` = 3L, .combine_value_labels = TRUE) #' @export recode.haven_labelled <- function( .x, ..., .default = NULL, .missing = NULL, .keep_value_labels = TRUE, .combine_value_labels = FALSE, .sep = " / " ) { ret <- dplyr::recode( .x = unclass(.x), ..., .default = .default, .missing = .missing ) if (mode(.x) == mode(ret)) { if (.keep_value_labels) { ret <- copy_labels(.x, ret) } if (.combine_value_labels) { ret <- copy_labels(.x, ret) old_vals <- unique(.x) new_vals <- c() for (o in old_vals) { new_vals <- c(new_vals, ret[.x == o][1]) } original_labels <- val_labels(.x) for (v in unique(new_vals)) { combined_label <- names( original_labels[original_labels %in% old_vals[new_vals == v]] ) if (length(combined_label) > 0) val_label(ret, v) <- paste(combined_label, collapse = .sep) } ret <- drop_unused_value_labels(ret) } } else { var_label(ret) <- var_label(.x) if (.keep_value_labels || .combine_value_labels) warning( "The type of .x has been changed and value labels attributes", "have been lost." ) } ret } labelled/NEWS.md0000644000176200001440000002153014444575565013140 0ustar liggesusers# labelled 2.12.0 **New features** * support of variable labels for packed columns, see dedicated vignette (#142) * new helpers `label_attribute()`, `get_label_attribute()` and `set_label_attribute()` to manipulate the "label" attribute on any object (#142) * new functions `get_variable_labels()`, `get_value_labels()`, `get_na_values()` and `get_na_range()` identical to `var_label()`, `val_labels()`, `na_values()` and `na_range()`, respectively * `to_character()` method for data frames (#140) # labelled 2.11.0 **Improvements** * `set_value_labels()`, `add_value_labels()`, `remove_value_labels()`, `set_variable_labels()`, `set_na_range()` and `set_na_values()` can now be applied on a vector (#126) * new argument `null_action` for `var_label()` when applied on a data frame (#131) * `look_for()` now returns `"missing"` (number of `NA`s) by default (#133) **Bug fixes** * bug fix in `print.look_for()` (#135) * bug fix in `unlabelled()` for classic vectors, now remained unchanged (#137) # labelled 2.10.0 * `look_for()` now accepts `survey` objects (#121) # labelled 2.9.1 * improved error messages for missing variable names (#118, @ajb5d) * better implementation of `look_for()` when no keyword is provided (#116) * bug fix in `user_na_to_tagged_na()` (#114) # labelled 2.9.0 **look_for() improvements:** * new function `look_for_and_select()` (#87) * `look_for()` can now search within factor levels and value labels (#104) **improvements for tagged NAs:** * better printing of value labels (#89) * new functions `user_na_to_tagged_na()`, `tagged_na_to_user_na()` and `tagged_na_to_regular_na()` * new option `explicit_tagged_na` in `to_factor()` and `to_character()` * new functions `unique_tagged_na()`, `duplicated_tagged_na()`, `order_tagged_na()`, `sort_tagged_na()` (#90, #91) **other improvements:** * new functions `is_user_na()` and `is_regular_na()` * new set of unit tests (#99) * trying to apply a value label, `na_range()` or `na_values()` to a factor will now produce an error * bug fix in `foreign_to_labelled()` for Stata files (#100) # labelled 2.8.0 * new helper `recode_if()` for recoding values based on condition, variable and value labels being preserved (#82) * `look_for()` could be time consuming for big data frames. Now, by default, only basic details of each variable are computed. You can compute all details with `details = "full"` (#77) * printing of `look_for()` results has been updated and do not rely anymore on `pillar` (#85) * `to_labelled()` can properly manage factors whose levels are coded as "[code] level", as produced by `to_factor(levels = "prefixed")` (#74 @courtiol) * new function `is_prefixed()` to check if a factor is prefixed * bug fix for `na_range<-` and `na_values<-` when applied to a data.frame (#80) # labelled 2.7.0 * a `.values` argument has been added to `set_na_values()` and `set_na_range()`, allowing to pass a list of values * a `.strict` option has been added to `set_variable_labels()`, `set_value_labels()`, `add_value_labels()`, `remove_value_labels()`, `set_na_values()` and `set_na_range()`, allowing to pass values for columns not observed in the data (it could be useful for using a same list of labels for several data.frame sharing some variables) (#70) * `copy_labels()` is less restrictive for non labelled vectors, copying variable label even if the two vectors are not of the same type (#71) * a `.strict` option has been added to `copy_labels()` (#71) # labelled 2.6.0 * `look_for()` has been redesigned: - `look_for()` now returns a tibble - columns with multiple values for each variable are now stored as named lists - a print method has been added for a clearer presentation of results in the console - use `lookfor_to_long_format()` to convert results with one row per factor level and per value label - use `convert_list_columns_to_character()` to convert list columns to simpler character vectors - `generate_dictionary()` is an equivalent of `look_for()` * `set_variable_labels`, `set_value_labels`, `add_value_labels`, and `remove_value_labels` now accept "tidy dots" (#67 @psanker) * new function `names_prefixed_by_values()` to get the names of a vector prefixed by their corresponding value # labelled 2.5.0 * new `.keep_value_labels` argument for `recode.haven_labelled()` * new `.combine_value_labels` argument for `recode.haven_labelled()` (#61) * new `drop_unused_value_labels()` method * an additional `.labels` argument for `set_value_labels()` * `user_na_to_na` argument has been added to `to_character.haven_labelled()` * `%>%` is now imported from `dplyr` * a cheatsheet has been added (#47) * internal documentation is now using **roxygen2** markdown support # labelled 2.4.0 * fixes for haven 2.3.0 (#59 by @hadley) * correct re-export of functions from `haven` * `update_labelled()` has been improved to allow to reconstruct all labelled vectors created with a previous version of `haven` # labelled 2.3.1 * an additional argument `keep_var_label` for `remove_labels()` * bug fix for `unlabelled()` when applied on a vector * when using `unclass = TRUE` with `to_factor()`, attributes are not removed anymore # labelled 2.3.0 * new function `unlabelled()` # labelled 2.2.2 * bug fix for `look_for()` (#52 by @NoahMarconi) * bug fix in `val_labels_to_na()` documentation # labelled 2.2.1 * bug fix for `na_range()` and `na_values()`: variable labels are now preserved (#48, thanks to @mspittler) # labelled 2.2.0 * new function `copy_labels_from()`, compliant with `dplyr` syntax * `update_labelled()` is now more strict (#42 by @iago-pssjd) * new functions `look_for()` and `lookfor()` imported from `questionr` (#44) * new `unlist` option for `var_label()` * `tagged_na()` and similar functions are now imported from `haven` # labelled 2.1.0 * `var_label()`, applied to a data.frame, now accepts a character vector of same length as the number of columns. * `set_variable_labels` has a new `.labels` argument. * New `unclass` option in `to_factor()`, to be used when `strict = TRUE` (#36) * Following `haven` version 2.1.0, it is not mandatory anymore to define a value label before defining a SPSS style missing value. `labelled_spss()`, `na_values()` and `na_range()` have been updated accordingly (#37) # labelled 2.0.2 * `to_factor()` bug fix then applied on a data.frame (#33) # labelled 2.0.1 * `update_labelled()` bug fix then applied on a data.frame (#31) # labelled 2.0.0 ## BREAKING CHANGE * Following version 2.0.0 of `haven`, `labelled()` and `labelled_spss()` now produce objects with class "haven_labelled" and "haven_labelled_spss", due to conflict between the previous "labelled" class and the "labelled" class used by `Hmisc`. * A new function `update_labelled()` could be used to convert data imported with an older version of `haven` to the new classes. ## Other changes * `user_na_to_na` option added to `to_factor()` * `foreign_to_labelled()` now import SPSS missing values (#27) * a `strict` argument added to `to_factor()` (#25) * `remove_attributes()` preserve character vectors (#30) # labelled 1.1.0 * extend `dplyr::recode()` method to be compatible with labelled vectors. * `copy_labels()` now copy also `na_range` and `na_values` attributes. * new method `remove_attributes()` # labelled 1.0.1 * bug fix: argument `drop_unused_labels` could now be used with `to_factor.data.frame()` * new labels argument for `to_labelled()` method when applied to a factor * bug fix: appropriate column names with `data.frame` (#20) # labelled 1.0.0 * now imports `haven` * new function to deal with user defined missing values (SPSS style): `na_values()`, `na_range()`, `set_na_values()`, `set_na_values()`, `remove_user_na()`, `user_na_to_na()`. * `remove_labels()` has been updated. # labelled 0.2.3 * new functions `set_variable_labels()`, `set_value_labels()`, `add_value_labels()` and `remove_value_labels()` compatible with `%>%`. * new functions `remove_val_labels` and `remove_var_label()`. * bug fix in `to_character.labelled()` when applied to data frames. # labelled 0.2.2 * `to_factor()`, `to_character()` and `to_labelled.factor()` now preserves variable label. * bug fix in `to_factor()` when applied to data frames. # labelled 0.2.0 * Following evolution of `haven`, `labelled` doesn't support missing values anymore (cf. https://github.com/hadley/haven/commit/4b12ff9d51ddb9e7486966b85e0bcff44992904d) * New function `to_character()` (cf. https://github.com/larmarange/labelled/commit/3d32852587bb707d06627e56407eed1c9d5a49de) * `to_factor()` could now be applied to a data.frame (cf. https://github.com/larmarange/labelled/commit/ce1d750681fe0c9bcd767cb83a8d72ed4c5fc5fb) * If `data.table` is available, labelled attribute are now changed by _reference_ (cf. https://github.com/larmarange/labelled/commit/c8b163f706122844d798e6625779e8a65e5bbf41) * `zap_labels()` added as a synonym of `remove_labels()` labelled/MD50000644000176200001440000001220514444603422012331 0ustar liggesusers3870e269c9c7690d2ac0b1861620753b *DESCRIPTION 760766f7fbc7f7e99dc743281d8ce2e3 *NAMESPACE a6b8bacc3ddce64969edc0393e58595c *NEWS.md d081f067d78a39f4d681f3fe3c11ec96 *R/copy_labels.R 09ee363d0780dd1bac729153253b9c38 *R/data.R 91179eef650f9f96920444a366a694f3 *R/drop_unused_value_labels.R f4a7239ec512ebec16fdb3fd4b8d3496 *R/is_prefixed.R c2cd8ed6abbe8bc30677449e7a126489 *R/labelled-package.R c159cbecf3d962eea36acf1f459a9a53 *R/labelled.R fa4597cd045f6413cf8b1454c75c2c20 *R/lookfor.R 61d99323bd7d7a5c388396ce264c693c *R/na_values.R 62d8c368578b2c3ea05ccb4fc53bc000 *R/recode.R 452f20d52b3cbc5ce4ea80055d647550 *R/recode_if.R 61059bc556df0a6a2b7e1616f05b53ba *R/remove_attributes.R b9a701cfb2176da92c272000a5d4f0a5 *R/remove_labels.R 4d3628aca472f45c006e03cdb8d3d65f *R/retrocompatibility.R a61976b45917d4f79d5da46f3358ff5c *R/tagged_na.R d4c6e910e4ee8024a3c8b3efd9098541 *R/to_character.R a9da8a57cb942eb31d6197fb3fb15e97 *R/to_factor.R 4cbc144eb0d9a9e46dffe681f1f216f5 *R/to_labelled.R 066a1283137bc0c476bfafc92bea2a26 *R/to_na.R 5f981b3ab98bea111df6d0c971cdb572 *R/val_labels.R eb14f3e2a1513ef601408070a7cd7696 *R/var_label.R 36bf7a93234bc412d4a240df7247f6b3 *README.md 817581eb95221a07ae306cc4757ccb02 *build/vignette.rds fd37eb471c738491c60f1b134fc52db7 *data/dta_file.rda 8141b7f85503c9012d892c4f73d05b80 *data/spss_file.rda a0ca560210ed4b201c51fb398692d305 *data/x_haven_2.0.rda cc704a75d2ec00eb4ce538756df69ef3 *data/x_spss_haven_2.0.rda c0e02346da389dff2c26734c10b488c3 *inst/WORDLIST dcadd9355db9e7cce9cedeceaf528344 *inst/doc/intro_labelled.R 9314b3b3ffa2e978648d33fb3368df63 *inst/doc/intro_labelled.Rmd a1fa0c84c6545db9283b50e00beaf251 *inst/doc/intro_labelled.html 6c1abd8a6767b991cf9e9afdbc39b442 *inst/doc/look_for.R f70fb7c076f25b00eedd8c34d616d4ce *inst/doc/look_for.Rmd 692fa0aee1122556562fee358a2ee95f *inst/doc/look_for.html 0f8ffaa08d83e7df6b59fa01f9ecebf3 *inst/doc/missing_values.R fb65203c6b65ea639cbf03e2d54d8ddb *inst/doc/missing_values.Rmd 160c8bb137977d626aad6e80df2693ad *inst/doc/missing_values.html 5ea8171f36373f71f3c230c8f87878de *inst/doc/packed_columns.R 4b51d8deaea2a430691d1f8efb27fc90 *inst/doc/packed_columns.Rmd 68edc4862abd170214704177363996d5 *inst/doc/packed_columns.html b0ed884aa3396a970665fe457a14a90f *man/copy_labels.Rd e4ad038e5f05d0dc17ec00ce216e517d *man/drop_unused_value_labels.Rd 051f5a77298a1855400207ca4f9fb27f *man/figures/labelled.png 557b96e5590bbe43a2ab6029003dacb1 *man/figures/labelled.svg cb1e46f469cfbbbde29c8b5113e1d789 *man/figures/lifecycle-archived.svg c0d2e5a54f1fa4ff02bf9533079dd1f7 *man/figures/lifecycle-defunct.svg a1b8c987c676c16af790f563f96cbb1f *man/figures/lifecycle-deprecated.svg c3978703d8f40f2679795335715e98f4 *man/figures/lifecycle-experimental.svg 952b59dc07b171b97d5d982924244f61 *man/figures/lifecycle-maturing.svg 27b879bf3677ea76e3991d56ab324081 *man/figures/lifecycle-questioning.svg 6902bbfaf963fbc4ed98b86bda80caa2 *man/figures/lifecycle-soft-deprecated.svg 53b3f893324260b737b3c46ed2a0e643 *man/figures/lifecycle-stable.svg 1c1fe7a759b86dc6dbcbe7797ab8246c *man/figures/lifecycle-superseded.svg fe321cd6e9909debb62538377ef60bca *man/is_prefixed.Rd 92eb57c037d1464534411070e0476a00 *man/look_for.Rd cf38f59bf77f3b235c8113cf138c3897 *man/na_values.Rd 63c7200e2fa1ce957e0bc547e12f8bb6 *man/names_prefixed_by_values.Rd 5a4cd9d686f9b27f472f73fa510d0098 *man/nolabel_to_na.Rd 83edd3b77b77c5976d238443ee2569c8 *man/recode.haven_labelled.Rd e8b172f4ce3a965255cf9926e60ce57a *man/recode_if.Rd c75242e227838467bcf2357580a22a7f *man/reexports.Rd 5563c51d23c6816d1a796f8da6e2fb2e *man/remove_attributes.Rd e6c325bc25e8cf4b5187067d88013eae *man/remove_labels.Rd 6441aa9209cf7153acf1e9cef7c478cd *man/sort_val_labels.Rd 732566c53152f8e6bfe1a1df648ed4d6 *man/tagged_na_to_user_na.Rd cbeeab50b3f169b8795016f2247125f6 *man/test_datasets.Rd fcc552cf209b9ebe5cca3d99ada2f8d8 *man/to_character.Rd e48ef964233b4134bea1d4cde09fd465 *man/to_factor.Rd f5e836ef49cd8688b289826cb58866cf *man/to_labelled.Rd 762668221135d56731a6c171ae46addf *man/unique_tagged_na.Rd b6d620d00c03c1eb6f372bd257f2b1e9 *man/update_labelled.Rd 48a998ffae3b566702a4d095d2def115 *man/val_labels.Rd 40461d1267a071fec39efce62830b231 *man/val_labels_to_na.Rd 093a6cf0db668cf2fdd76d25be84d8cf *man/var_label.Rd 1b7ff84c1f3d4ea54194c6b7424b930a *tests/spelling.R a86008d3e4da9fb1879621ca265a6039 *tests/testthat.R c4717b1732f8ddf780051c15c230a218 *tests/testthat/test-copy_labels.r 5d2518cd9a0924f7d3ccbb9141f4a0a4 *tests/testthat/test-labelled.r d17ed73198adf80b7042bbdad2492bae *tests/testthat/test-miscellanous.R 1fdd1c69545c5f753559131ed30cf4a4 *tests/testthat/test-na_values.R 6616413b768d8d942286d55f88f75ab1 *tests/testthat/test-recode_if.r 811517d9b78ddab30f8beaffa4490faf *tests/testthat/test-tagged_na.r 41ae01780abcf903f745095d7175aa34 *tests/testthat/test-to_labelled.r 7ef2e50744d942e4c34c1a179fa8670d *tests/testthat/test_lookfor.R 06c65f6d55ce89acddd6264225d28355 *vignettes/approaches.drawio d184c748e7df6de59878897889003dad *vignettes/approaches.png 9314b3b3ffa2e978648d33fb3368df63 *vignettes/intro_labelled.Rmd f70fb7c076f25b00eedd8c34d616d4ce *vignettes/look_for.Rmd fb65203c6b65ea639cbf03e2d54d8ddb *vignettes/missing_values.Rmd 4b51d8deaea2a430691d1f8efb27fc90 *vignettes/packed_columns.Rmd labelled/inst/0000755000176200001440000000000014444575741013012 5ustar liggesuserslabelled/inst/doc/0000755000176200001440000000000014444575742013560 5ustar liggesuserslabelled/inst/doc/intro_labelled.Rmd0000644000176200001440000003663414357761455017220 0ustar liggesusers--- author: "Joseph Larmarange" title: "Introduction to labelled" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Introduction to labelled} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- The purpose of the **labelled** package is to provide functions to manipulate metadata as variable labels, value labels and defined missing values using the `haven_labelled` and `haven_labelled_spss` classes introduced in `haven` package. These classes allow to add metadata (variable, value labels and SPSS-style missing values) to vectors. It should be noted that **value labels** doesn't imply that your vectors should be considered as categorical or continuous. Therefore, value labels are not intended to be use for data analysis. For example, before performing modeling, you should convert vectors with value labels into factors or into classic numeric/character vectors. Therefore, two main approaches could be considered. ![Two main approaches](approaches.png){width=100%} In **approach A**, `haven_labelled` vectors are converted into factors or into numeric/character vectors just after data import, using `unlabelled()`, `to_factor()` or `unclass()`. Then, data cleaning, recoding and analysis are performed using classic **R** vector types. In **approach B**, `haven_labelled` vectors are kept for data cleaning and coding, allowing to preserved original recoding, in particular if data should be reexported after that step. Functions provided by `labelled` will be useful for managing value labels. However, as in approach A, `haven_labelled` vectors will have to be converted into classic factors or numeric vectors before data analysis (in particular modeling) as this is the way categorical and continuous variables should be coded for analysis functions. ## Variable labels A variable label could be specified for any vector using `var_label()`. ```{r} library(labelled) var_label(iris$Sepal.Length) <- "Length of sepal" ``` It's possible to add a variable label to several columns of a data frame using a named list. ```{r} var_label(iris) <- list( Petal.Length = "Length of petal", Petal.Width = "Width of Petal" ) ``` To get the variable label, simply call `var_label()`. ```{r} var_label(iris$Petal.Width) var_label(iris) ``` To remove a variable label, use `NULL`. ```{r} var_label(iris$Sepal.Length) <- NULL ``` In **RStudio**, variable labels will be displayed in data viewer. ```{r, eval=FALSE} View(iris) ``` You can display and search through variable names and labels with `look_for()`: ```{r} look_for(iris) look_for(iris, "pet") look_for(iris, details = FALSE) ``` ## Value labels The first way to create a labelled vector is to use the `labelled()` function. It's not mandatory to provide a label for each value observed in your vector. You can also provide a label for values not observed. ```{r} v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 8, refused = 9) ) v ``` Use `val_labels()` to get all value labels and `val_label()` to get the value label associated with a specific value. ```{r} val_labels(v) val_label(v, 8) ``` `val_labels()` could also be used to modify all the value labels attached to a vector, while `val_label()` will update only one specific value label. ```{r} val_labels(v) <- c(yes = 1, nno = 3, bug = 5) v val_label(v, 3) <- "no" v ``` With `val_label()`, you can also add or remove specific value labels. ```{r} val_label(v, 2) <- "maybe" val_label(v, 5) <- NULL v ``` To remove all value labels, use `val_labels()` and `NULL`. The `haven_labelled` class will also be removed. ```{r} val_labels(v) <- NULL v ``` Adding a value label to a non labelled vector will apply `haven_labelled` class to it. ```{r} val_label(v, 1) <- "yes" v ``` Note that applying `val_labels()` to a factor will generate an error! ```{r, error = TRUE} f <- factor(1:3) f val_labels(f) <- c(yes = 1, no = 3) ``` You could also apply `val_labels()` to several columns of a data frame. ```{r} df <- data.frame(v1 = 1:3, v2 = c(2, 3, 1), v3 = 3:1) val_label(df, 1) <- "yes" val_label(df[, c("v1", "v3")], 2) <- "maybe" val_label(df[, c("v2", "v3")], 3) <- "no" val_labels(df) val_labels(df[, c("v1", "v3")]) <- c(YES = 1, MAYBE = 2, NO = 3) val_labels(df) val_labels(df) <- NULL val_labels(df) val_labels(df) <- list(v1 = c(yes = 1, no = 3), v2 = c(a = 1, b = 2, c = 3)) val_labels(df) ``` ## Sorting value labels Value labels are sorted by default in the order they have been created. ```{r} v <- c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA) val_label(v, 1) <- "yes" val_label(v, 3) <- "no" val_label(v, 9) <- "refused" val_label(v, 2) <- "maybe" val_label(v, 8) <- "don't know" v ``` It could be useful to reorder the value labels according to their attached values, with `sort_val_labels()`. ```{r} sort_val_labels(v) sort_val_labels(v, decreasing = TRUE) ``` If you prefer, you can also sort them according to the labels. ```{r} sort_val_labels(v, according_to = "l") ``` ## User defined missing values (SPSS's style) `haven` (>= 2.0.0) introduced an additional `haven_labelled_spss` class to deal with user defined missing values. In such case, additional attributes will be used to indicate with values should be considered as missing, but such values will not be stored as internal `NA` values. You should note that most R function will not take this information into account. Therefore, you will have to convert missing values into `NA` if required before analysis. These defined missing values could co-exist with internal `NA` values. It is possible to manipulate this missing values with `na_values()` and `na_range()`. Note that `is.na()` will return `TRUE` as well for user-defined missing values. ```{r} v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) v na_values(v) <- 9 na_values(v) v is.na(v) na_values(v) <- NULL v na_range(v) <- c(5, Inf) na_range(v) v ``` Since version 2.1.0, it is not mandatory to define at least one value label before defining missing values. ```{r} x <- c(1, 2, 2, 9) na_values(x) <- 9 x ``` To convert user defined missing values into `NA`, simply use `user_na_to_na()`. ```{r} v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v v2 <- user_na_to_na(v) v2 ``` You can also remove user missing values definition without converting these values to `NA`. ```{r} v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v v2 <- remove_user_na(v) v2 ``` or ```{r} v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v na_values(v) <- NULL v ``` ## Other conversion to NA In some cases, values who don't have an attached value label could be considered as missing. `nolabel_to_na()` will convert them to `NA`. ```{r} v <- labelled(c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, maybe = 2, no = 3)) v nolabel_to_na(v) ``` In other cases, a value label is attached only to specific values that corresponds to a missing value. For example: ```{r} size <- labelled(c(1.88, 1.62, 1.78, 99, 1.91), c("not measured" = 99)) size ``` In such cases, `val_labels_to_na()` could be appropriate. ```{r} val_labels_to_na(size) ``` These two functions could also be applied to an overall data frame. Only labelled vectors will be impacted. ## Converting to factor A labelled vector could easily be converted to a factor with `to_factor()`. ```{r} v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 8, refused = 9) ) v to_factor(v) ``` The `levels` argument allows to specify what should be used as the factor levels, i.e. the labels (default), the values or the labels prefixed with values. ```{r} to_factor(v, levels = "v") to_factor(v, levels = "p") ``` The `ordered` argument will create an ordinal factor. ```{r} to_factor(v, ordered = TRUE) ``` The argument `nolabel_to_na` specify if the corresponding function should be applied before converting to a factor. Therefore, the two following commands are equivalent. ```{r} to_factor(v, nolabel_to_na = TRUE) to_factor(nolabel_to_na(v)) ``` `sort_levels` specifies how the levels should be sorted: `"none"` to keep the order in which value labels have been defined, `"values"` to order the levels according to the values and `"labels"` according to the labels. `"auto"` (default) will be equivalent to `"none"` except if some values with no attached labels are found and are not dropped. In that case, `"values"` will be used. ```{r} to_factor(v, sort_levels = "n") to_factor(v, sort_levels = "v") to_factor(v, sort_levels = "l") ``` The function `to_labelled()` could be used to turn a factor into a labelled numeric vector. ```{r} f <- factor(1:3, labels = c("a", "b", "c")) to_labelled(f) ``` Note that `to_labelled(to_factor(v))` will not be equal to `v` due to the way factors are stored internally by **R**. ```{r} v to_labelled(to_factor(v)) ``` ## Other type of conversions You can use `to_character()` for converting into a character vector instead of a factor. ```{r} v to_character(v) ``` To remove the `haven_class`, you can simply use `unclass()`. ```{r} unclass(v) ``` Note that value labels will be preserved as an attribute to the vector. ```{r} remove_val_labels(v) ``` To remove value labels, use `remove_val_labels()`. ```{r} remove_val_labels(v) ``` Note that if your vector does have user-defined missing values, you may also want to use `remove_user_na()`. ```{r} x <- c(1, 2, 2, 9) na_values(x) <- 9 val_labels(x) <- c(yes = 1, no = 2) var_label(x) <- "A test variable" x remove_val_labels(x) remove_user_na(x) remove_user_na(x, user_na_to_na = TRUE) remove_val_labels(remove_user_na(x)) unclass(x) ``` You can remove all labels and user-defined missing values with `remove_labels()`. Use `keep_var_label = TRUE` to preserve only variable label. ```{r} remove_labels(x, user_na_to_na = TRUE) remove_labels(x, user_na_to_na = TRUE, keep_var_label = TRUE) ``` ## Conditional conversion to factors{#unlabelled} For any analysis, it is the responsibility of user to identify which labelled numeric vectors should be considered as **categorical** (and therefore converted into factors using `to_factor()`) and which variables should be treated as **continuous** (and therefore unclassed into numeric using `base::unclass()`). It should be noted that most functions expect categorical variables to be coded as factors. It includes most modeling functions (such as `stats::lm()` or `stats::glm()`) or plotting functions from `ggplot2`. In most of cases, if data documentation was properly done, categorical variables corresponds to vectors where all observed values have a value label while vectors where only few values have a value label should be considered as continuous. In that situation, you could apply the `unlabelled()` method to an overall data frame. By default, `unlabelled()` works as follow: - if a column doesn't inherit the `haven_labelled` class, it will be not affected; - if all observed values have a corresponding value label, the column will be converted into a factor (using `to_factor()`); - otherwise, the column will be unclassed (and converted back to a numeric or character vector by applying `base::unclass()`). ```{r} df <- data.frame( a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), c = labelled(c(1, 1, 2, 2), labels = c(No = 1, Yes = 2, DK = 3)), d = labelled(c("a", "a", "b", "c"), labels = c(No = "a", Yes = "b")), e = labelled_spss( c(1, 9, 1, 2), labels = c(No = 1, Yes = 2), na_values = 9 ) ) df %>% look_for() unlabelled(df) %>% look_for() unlabelled(df, user_na_to_na = TRUE) %>% look_for() unlabelled(df, drop_unused_labels = TRUE) %>% look_for() ``` ## Importing labelled data In **haven** package, `read_spss`, `read_stata` and `read_sas` are natively importing data using the `labelled` class and the `label` attribute for variable labels. Functions from **foreign** package could also import some metadata from **SPSS** and **Stata** files. `to_labelled` can convert data imported with **foreign** into a labelled data frame. However, there are some limitations compared to using **haven**: - For **SPSS** files, it will be better to set `use.value.labels = FALSE`, `to.data.frame = FALSE` and `use.missings = FALSE` when calling `read.spss`. If `use.value.labels = TRUE`, variable with value labels will be converted into factors by `read.spss` (and kept as factors by `foreign_to_label`). If `to.data.frame = TRUE`, meta data describing the missing values will not be imported. If `use.missings = TRUE`, missing values would have been converted to `NA` by `read.spss`. - For **Stata** files, set `convert.factors = FALSE` when calling `read.dta` to avoid conversion of variables with value labels into factors. So far, missing values defined in Stata are always imported as `NA` by `read.dta` and could not be retrieved by `foreign_to_labelled`. The **memisc** package provide functions to import variable metadata and store them in specific object of class `data.set`. The `to_labelled` method can convert a data.set into a labelled data frame. ```{r, eval=FALSE} # from foreign library(foreign) df <- to_labelled(read.spss( "file.sav", to.data.frame = FALSE, use.value.labels = FALSE, use.missings = FALSE )) df <- to_labelled(read.dta( "file.dta", convert.factors = FALSE )) # from memisc library(memisc) nes1948.por <- UnZip("anes/NES1948.ZIP", "NES1948.POR", package = "memisc") nes1948 <- spss.portable.file(nes1948.por) df <- to_labelled(nes1948) ds <- as.data.set(nes19480) df <- to_labelled(ds) ``` ## Using labelled with dplyr/magrittr If you are using the `%>%` operator, you can use the functions `set_variable_labels()`, `set_value_labels()`, `add_value_labels()` and `remove_value_labels()`. ```{r} library(dplyr) df <- data_frame(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) %>% set_variable_labels(s1 = "Sex", s2 = "Question") %>% set_value_labels(s1 = c(Male = "M", Female = "F"), s2 = c(Yes = 1, No = 2)) df$s2 ``` `set_value_labels()` will replace the list of value labels while `add_value_labels()` will update it. ```{r} df <- df %>% set_value_labels(s2 = c(Yes = 1, "Don't know" = 8, Unknown = 9)) df$s2 df <- df %>% add_value_labels(s2 = c(No = 2)) df$s2 ``` You can also remove some variable and/or value labels. ```{r} df <- df %>% set_variable_labels(s1 = NULL) # removing one value label df <- df %>% remove_value_labels(s2 = 2) df$s2 # removing several value labels df <- df %>% remove_value_labels(s2 = 8:9) df$s2 # removing all value labels df <- df %>% set_value_labels(s2 = NULL) df$s2 ``` To convert variables, the easiest is to use `unlabelled()`. ```{r} library(questionr) data(fertility) glimpse(women) glimpse(women %>% unlabelled()) ``` Alternatively, you can use functions as `dplyr::mutate_if()` or `dplyr::mutate_at()`. See the example below. ```{r} glimpse(to_factor(women)) glimpse(women %>% mutate_if(is.labelled, to_factor)) glimpse(women %>% mutate_at(vars(employed:religion), to_factor)) ``` labelled/inst/doc/intro_labelled.R0000644000176200001440000002125314444575735016667 0ustar liggesusers## ----------------------------------------------------------------------------- library(labelled) var_label(iris$Sepal.Length) <- "Length of sepal" ## ----------------------------------------------------------------------------- var_label(iris) <- list( Petal.Length = "Length of petal", Petal.Width = "Width of Petal" ) ## ----------------------------------------------------------------------------- var_label(iris$Petal.Width) var_label(iris) ## ----------------------------------------------------------------------------- var_label(iris$Sepal.Length) <- NULL ## ---- eval=FALSE-------------------------------------------------------------- # View(iris) ## ----------------------------------------------------------------------------- look_for(iris) look_for(iris, "pet") look_for(iris, details = FALSE) ## ----------------------------------------------------------------------------- v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 8, refused = 9) ) v ## ----------------------------------------------------------------------------- val_labels(v) val_label(v, 8) ## ----------------------------------------------------------------------------- val_labels(v) <- c(yes = 1, nno = 3, bug = 5) v val_label(v, 3) <- "no" v ## ----------------------------------------------------------------------------- val_label(v, 2) <- "maybe" val_label(v, 5) <- NULL v ## ----------------------------------------------------------------------------- val_labels(v) <- NULL v ## ----------------------------------------------------------------------------- val_label(v, 1) <- "yes" v ## ---- error = TRUE------------------------------------------------------------ f <- factor(1:3) f val_labels(f) <- c(yes = 1, no = 3) ## ----------------------------------------------------------------------------- df <- data.frame(v1 = 1:3, v2 = c(2, 3, 1), v3 = 3:1) val_label(df, 1) <- "yes" val_label(df[, c("v1", "v3")], 2) <- "maybe" val_label(df[, c("v2", "v3")], 3) <- "no" val_labels(df) val_labels(df[, c("v1", "v3")]) <- c(YES = 1, MAYBE = 2, NO = 3) val_labels(df) val_labels(df) <- NULL val_labels(df) val_labels(df) <- list(v1 = c(yes = 1, no = 3), v2 = c(a = 1, b = 2, c = 3)) val_labels(df) ## ----------------------------------------------------------------------------- v <- c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA) val_label(v, 1) <- "yes" val_label(v, 3) <- "no" val_label(v, 9) <- "refused" val_label(v, 2) <- "maybe" val_label(v, 8) <- "don't know" v ## ----------------------------------------------------------------------------- sort_val_labels(v) sort_val_labels(v, decreasing = TRUE) ## ----------------------------------------------------------------------------- sort_val_labels(v, according_to = "l") ## ----------------------------------------------------------------------------- v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9) ) v na_values(v) <- 9 na_values(v) v is.na(v) na_values(v) <- NULL v na_range(v) <- c(5, Inf) na_range(v) v ## ----------------------------------------------------------------------------- x <- c(1, 2, 2, 9) na_values(x) <- 9 x ## ----------------------------------------------------------------------------- v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v v2 <- user_na_to_na(v) v2 ## ----------------------------------------------------------------------------- v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v v2 <- remove_user_na(v) v2 ## ----------------------------------------------------------------------------- v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10)) v na_values(v) <- NULL v ## ----------------------------------------------------------------------------- v <- labelled(c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, maybe = 2, no = 3)) v nolabel_to_na(v) ## ----------------------------------------------------------------------------- size <- labelled(c(1.88, 1.62, 1.78, 99, 1.91), c("not measured" = 99)) size ## ----------------------------------------------------------------------------- val_labels_to_na(size) ## ----------------------------------------------------------------------------- v <- labelled( c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 8, refused = 9) ) v to_factor(v) ## ----------------------------------------------------------------------------- to_factor(v, levels = "v") to_factor(v, levels = "p") ## ----------------------------------------------------------------------------- to_factor(v, ordered = TRUE) ## ----------------------------------------------------------------------------- to_factor(v, nolabel_to_na = TRUE) to_factor(nolabel_to_na(v)) ## ----------------------------------------------------------------------------- to_factor(v, sort_levels = "n") to_factor(v, sort_levels = "v") to_factor(v, sort_levels = "l") ## ----------------------------------------------------------------------------- f <- factor(1:3, labels = c("a", "b", "c")) to_labelled(f) ## ----------------------------------------------------------------------------- v to_labelled(to_factor(v)) ## ----------------------------------------------------------------------------- v to_character(v) ## ----------------------------------------------------------------------------- unclass(v) ## ----------------------------------------------------------------------------- remove_val_labels(v) ## ----------------------------------------------------------------------------- remove_val_labels(v) ## ----------------------------------------------------------------------------- x <- c(1, 2, 2, 9) na_values(x) <- 9 val_labels(x) <- c(yes = 1, no = 2) var_label(x) <- "A test variable" x remove_val_labels(x) remove_user_na(x) remove_user_na(x, user_na_to_na = TRUE) remove_val_labels(remove_user_na(x)) unclass(x) ## ----------------------------------------------------------------------------- remove_labels(x, user_na_to_na = TRUE) remove_labels(x, user_na_to_na = TRUE, keep_var_label = TRUE) ## ----------------------------------------------------------------------------- df <- data.frame( a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)), b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)), c = labelled(c(1, 1, 2, 2), labels = c(No = 1, Yes = 2, DK = 3)), d = labelled(c("a", "a", "b", "c"), labels = c(No = "a", Yes = "b")), e = labelled_spss( c(1, 9, 1, 2), labels = c(No = 1, Yes = 2), na_values = 9 ) ) df %>% look_for() unlabelled(df) %>% look_for() unlabelled(df, user_na_to_na = TRUE) %>% look_for() unlabelled(df, drop_unused_labels = TRUE) %>% look_for() ## ---- eval=FALSE-------------------------------------------------------------- # # from foreign # library(foreign) # df <- to_labelled(read.spss( # "file.sav", # to.data.frame = FALSE, # use.value.labels = FALSE, # use.missings = FALSE # )) # df <- to_labelled(read.dta( # "file.dta", # convert.factors = FALSE # )) # # # from memisc # library(memisc) # nes1948.por <- UnZip("anes/NES1948.ZIP", "NES1948.POR", package = "memisc") # nes1948 <- spss.portable.file(nes1948.por) # df <- to_labelled(nes1948) # ds <- as.data.set(nes19480) # df <- to_labelled(ds) ## ----------------------------------------------------------------------------- library(dplyr) df <- data_frame(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) %>% set_variable_labels(s1 = "Sex", s2 = "Question") %>% set_value_labels(s1 = c(Male = "M", Female = "F"), s2 = c(Yes = 1, No = 2)) df$s2 ## ----------------------------------------------------------------------------- df <- df %>% set_value_labels(s2 = c(Yes = 1, "Don't know" = 8, Unknown = 9)) df$s2 df <- df %>% add_value_labels(s2 = c(No = 2)) df$s2 ## ----------------------------------------------------------------------------- df <- df %>% set_variable_labels(s1 = NULL) # removing one value label df <- df %>% remove_value_labels(s2 = 2) df$s2 # removing several value labels df <- df %>% remove_value_labels(s2 = 8:9) df$s2 # removing all value labels df <- df %>% set_value_labels(s2 = NULL) df$s2 ## ----------------------------------------------------------------------------- library(questionr) data(fertility) glimpse(women) glimpse(women %>% unlabelled()) ## ----------------------------------------------------------------------------- glimpse(to_factor(women)) glimpse(women %>% mutate_if(is.labelled, to_factor)) glimpse(women %>% mutate_at(vars(employed:religion), to_factor)) labelled/inst/doc/packed_columns.html0000644000176200001440000005537714444575741017455 0ustar liggesusers Variables labels and packed columns

Variables labels and packed columns

Joseph Larmarange

The tidyr allows to group several columns of a tibble into one single df-column, see tidyr::pack(). Such df-column is itself a tibble. It’s not currently clear why you would ever want to pack columns since few functions work with this sort of data.

library(tidyr)
d <- iris %>%
  as_tibble() %>%
  pack(
    Sepal = starts_with("Sepal"),
    Petal = starts_with("Petal"),
    .names_sep = "."
  )
str(d)
## tibble [150 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sepal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##   ..$ Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##   .. ..- attr(*, "label")= chr "Length of petal"
##   ..$ Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##   .. ..- attr(*, "label")= chr "Width of Petal"
class(d$Sepal)
## [1] "tbl_df"     "tbl"        "data.frame"

Regarding variable labels, you may want to define a label for one sub-column of a df-column, or eventually a label for the df-column itself.

For a sub-column, you could use easily var_label() to define your label.

library(labelled)
var_label(d$Sepal$Length) <- "Length of the sepal"
str(d)
## tibble [150 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sepal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##   .. ..- attr(*, "label")= chr "Length of the sepal"
##   ..$ Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##   .. ..- attr(*, "label")= chr "Length of petal"
##   ..$ Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##   .. ..- attr(*, "label")= chr "Width of Petal"

But you cannot use directly var_label() for the df-column.

var_label(d$Petal) <- "wrong label for Petal"
str(d)
## tibble [150 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sepal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##   .. ..- attr(*, "label")= chr "Length of the sepal"
##   ..$ Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"
##   ..$ Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"

As d$Petal is itself a tibble, applying var_label() on it would have an effect on each sub-column. To change a variable label to the df-column itself, you could use label_attribute().

label_attribute(d$Petal) <- "correct label for Petal"
str(d)
## tibble [150 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sepal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##   .. ..- attr(*, "label")= chr "Length of the sepal"
##   ..$ Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##  $ Petal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"
##   ..$ Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"
##   ..- attr(*, "label")= chr "correct label for Petal"

On the other hand, set_variable_labels() works differently, as the primary intention of this function is to work on the columns of a tibble.

d <- d %>% set_variable_labels(Sepal = "Label of the Sepal df-column")
str(d)
## tibble [150 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sepal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##   .. ..- attr(*, "label")= chr "Length of the sepal"
##   ..$ Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##   ..- attr(*, "label")= chr "Label of the Sepal df-column"
##  $ Petal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"
##   ..$ Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"
##   ..- attr(*, "label")= chr "correct label for Petal"

This is equivalent to:

var_label(d) <- list(Sepal = "Label of the Sepal df-column")
str(d)
## tibble [150 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sepal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##   .. ..- attr(*, "label")= chr "Length of the sepal"
##   ..$ Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##   ..- attr(*, "label")= chr "Label of the Sepal df-column"
##  $ Petal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"
##   ..$ Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##   .. ..- attr(*, "label")= chr "wrong label for Petal"
##   ..- attr(*, "label")= chr "correct label for Petal"

To use set_variable_labels() on sub-columns, you should use this syntax:

d$Petal <- d$Petal %>%
  set_variable_labels(
    Length = "Petal length",
    Width = "Petal width"
  )
str(d)
## tibble [150 × 3] (S3: tbl_df/tbl/data.frame)
##  $ Species: Factor w/ 3 levels "setosa","versicolor",..: 1 1 1 1 1 1 1 1 1 1 ...
##  $ Sepal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 5.1 4.9 4.7 4.6 5 5.4 4.6 5 4.4 4.9 ...
##   .. ..- attr(*, "label")= chr "Length of the sepal"
##   ..$ Width : num [1:150] 3.5 3 3.2 3.1 3.6 3.9 3.4 3.4 2.9 3.1 ...
##   ..- attr(*, "label")= chr "Label of the Sepal df-column"
##  $ Petal  : tibble [150 × 2] (S3: tbl_df/tbl/data.frame)
##   ..$ Length: num [1:150] 1.4 1.4 1.3 1.5 1.4 1.7 1.4 1.5 1.4 1.5 ...
##   .. ..- attr(*, "label")= chr "Petal length"
##   ..$ Width : num [1:150] 0.2 0.2 0.2 0.2 0.2 0.4 0.3 0.2 0.2 0.1 ...
##   .. ..- attr(*, "label")= chr "Petal width"
##   ..- attr(*, "label")= chr "correct label for Petal"

If you want to get the list of variable labels of a tibble, by default var_label() or get_variable_labels() will return the labels of the first level of columns.

d %>% get_variable_labels()
## $Species
## NULL
## 
## $Sepal
## [1] "Label of the Sepal df-column"
## 
## $Petal
## [1] "correct label for Petal"

To obtain the list of variable labels for sub-columns, you could use recurse = TRUE:

d %>% get_variable_labels(recurse = TRUE)
## $Species
## NULL
## 
## $Sepal
## $Sepal$Length
## [1] "Length of the sepal"
## 
## $Sepal$Width
## NULL
## 
## 
## $Petal
## $Petal$Length
## [1] "Petal length"
## 
## $Petal$Width
## [1] "Petal width"
d %>%
  get_variable_labels(
    recurse = TRUE,
    null_action = "fill",
    unlist = TRUE
  )
##               Species          Sepal.Length           Sepal.Width 
##             "Species" "Length of the sepal"               "Width" 
##          Petal.Length           Petal.Width 
##        "Petal length"         "Petal width"
labelled/inst/doc/packed_columns.R0000644000176200001440000000305014444575741016667 0ustar liggesusers## ----------------------------------------------------------------------------- library(tidyr) d <- iris %>% as_tibble() %>% pack( Sepal = starts_with("Sepal"), Petal = starts_with("Petal"), .names_sep = "." ) str(d) class(d$Sepal) ## ----------------------------------------------------------------------------- library(labelled) var_label(d$Sepal$Length) <- "Length of the sepal" str(d) ## ----------------------------------------------------------------------------- var_label(d$Petal) <- "wrong label for Petal" str(d) ## ----------------------------------------------------------------------------- label_attribute(d$Petal) <- "correct label for Petal" str(d) ## ----------------------------------------------------------------------------- d <- d %>% set_variable_labels(Sepal = "Label of the Sepal df-column") str(d) ## ----------------------------------------------------------------------------- var_label(d) <- list(Sepal = "Label of the Sepal df-column") str(d) ## ----------------------------------------------------------------------------- d$Petal <- d$Petal %>% set_variable_labels( Length = "Petal length", Width = "Petal width" ) str(d) ## ----------------------------------------------------------------------------- d %>% get_variable_labels() ## ----------------------------------------------------------------------------- d %>% get_variable_labels(recurse = TRUE) d %>% get_variable_labels( recurse = TRUE, null_action = "fill", unlist = TRUE ) labelled/inst/doc/missing_values.html0000644000176200001440000011101414444575740017472 0ustar liggesusers About missing values: regular NAs, tagged NAs and user NAs

About missing values: regular NAs, tagged NAs and user NAs

Joseph Larmarange

In base R, missing values are indicated using the specific value NA. Regular NAs could be used with any type of vector (double, integer, character, factor, Date, etc.).

Other statistical software have implemented ways to differentiate several types of missing values.

Stata and SAS have a system of tagged NAs, where NA values are tagged with a letter (from a to z). SPSS allows users to indicate that certain non-missing values should be treated in some analysis as missing (user NAs). The haven package implements tagged NAs and user NAs in order to keep this information when importing files from Stata, SAS or SPSS.

library(labelled)

Tagged NAs

Creation and tests

Tagged NAs are proper NA values with a tag attached to them. They can be created with tagged_na(). The attached tag should be a single letter, lowercase (a-z) or uppercase (A-Z).

x <- c(1:5, tagged_na("a"), tagged_na("z"), NA)

For most R functions, tagged NAs are just considered as regular NAs. By default, they are just printed as any other regular NA.

x
## [1]  1  2  3  4  5 NA NA NA
is.na(x)
## [1] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE

To show/print their tags, you need to use na_tag(), print_tagged_na() or format_tagged_na().

na_tag(x)
## [1] NA  NA  NA  NA  NA  "a" "z" NA
print_tagged_na(x)
## [1]     1     2     3     4     5 NA(a) NA(z)    NA
format_tagged_na(x)
## [1] "    1" "    2" "    3" "    4" "    5" "NA(a)" "NA(z)" "   NA"

To test if a certain NA is a regular NA or a tagged NA, you should use is_regular_na() or is_tagged_na().

is.na(x)
## [1] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE  TRUE
is_tagged_na(x)
## [1] FALSE FALSE FALSE FALSE FALSE  TRUE  TRUE FALSE
# You can test for specific tagged NAs with the second argument
is_tagged_na(x, "a")
## [1] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE
is_regular_na(x)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE

Tagged NAs could be defined only for double vectors. If you add a tagged NA to a character vector, it will be converted into a regular NA. If you add a tagged NA to an integer vector, the vector will be converted into a double vector.

y <- c("a", "b", tagged_na("z"))
y
## [1] "a" "b" NA
is_tagged_na(y)
## [1] FALSE FALSE FALSE
format_tagged_na(y)
## Error: `x` must be a double vector
z <- c(1L, 2L, tagged_na("a"))
typeof(z)
## [1] "double"
format_tagged_na(z)
## [1] "    1" "    2" "NA(a)"

Unique values, duplicates and sorting with tagged NAs

By default, functions such as base::unique(), base::duplicated(), base::order() or base::sort() will treat tagged NAs as the same thing as a regular NA. You can use unique_tagged_na(), duplicated_tagged_na(), order_tagged_na() and sort_tagged_na() as alternatives that will treat two tagged NAs with different tags as separate values.

x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA)
x %>% print_tagged_na()
## [1]     1     2 NA(a)     1 NA(z)     2 NA(a)    NA
unique(x) %>% print_tagged_na()
## [1]     1     2 NA(a)
unique_tagged_na(x) %>% print_tagged_na()
## [1]     1     2 NA(a) NA(z)    NA
duplicated(x)
## [1] FALSE FALSE FALSE  TRUE  TRUE  TRUE  TRUE  TRUE
duplicated_tagged_na(x)
## [1] FALSE FALSE FALSE  TRUE FALSE  TRUE  TRUE FALSE
sort(x, na.last = TRUE) %>% print_tagged_na()
## [1]     1     1     2     2 NA(a) NA(z) NA(a)    NA
sort_tagged_na(x) %>% print_tagged_na()
## [1]     1     1     2     2 NA(a) NA(a) NA(z)    NA

Tagged NAs and value labels

It is possible to define value labels for tagged NAs.

x <- c(1, 0, 1, tagged_na("r"), 0, tagged_na("d"), tagged_na("z"), NA)
val_labels(x) <- c(
  no = 0, yes = 1,
  "don't know" = tagged_na("d"),
  refusal = tagged_na("r")
)
x
## <labelled<double>[8]>
## [1]     1     0     1 NA(r)     0 NA(d) NA(z)    NA
## 
## Labels:
##  value      label
##      0         no
##      1        yes
##  NA(d) don't know
##  NA(r)    refusal

When converting such labelled vector into factor, tagged NAs are, by default, converted into regular NAs (it is not possible to define tagged NAs with factors).

to_factor(x)
## [1] yes  no   yes  <NA> no   <NA> <NA> <NA>
## Levels: no yes

However, the option explicit_tagged_na of to_factor() allows to transform tagged NAs into explicit factor levels.

to_factor(x, explicit_tagged_na = TRUE)
## [1] yes        no         yes        refusal    no         don't know NA(z)     
## [8] <NA>      
## Levels: no yes don't know refusal NA(z)
to_factor(x, levels = "prefixed", explicit_tagged_na = TRUE)
## [1] [1] yes            [0] no             [1] yes            [NA(r)] refusal   
## [5] [0] no             [NA(d)] don't know [NA(z)] NA(z)      <NA>              
## Levels: [0] no [1] yes [NA(d)] don't know [NA(r)] refusal [NA(z)] NA(z)

Conversion into user NAs

Tagged NAs can be converted into user NAs with tagged_na_to_user_na().

tagged_na_to_user_na(x)
## <labelled_spss<double>[8]>
## [1]  1  0  1  3  0  2  4 NA
## Missing range:  [2, 4]
## 
## Labels:
##  value      label
##      0         no
##      1        yes
##      2 don't know
##      3    refusal
##      4      NA(z)
tagged_na_to_user_na(x, user_na_start = 10)
## <labelled_spss<double>[8]>
## [1]  1  0  1 11  0 10 12 NA
## Missing range:  [10, 12]
## 
## Labels:
##  value      label
##      0         no
##      1        yes
##     10 don't know
##     11    refusal
##     12      NA(z)

Use tagged_na_to_regular_na() to convert tagged NAs into regular NAs.

tagged_na_to_regular_na(x)
## <labelled<double>[8]>
## [1]  1  0  1 NA  0 NA NA NA
## 
## Labels:
##  value label
##      0    no
##      1   yes
tagged_na_to_regular_na(x) %>% is_tagged_na()
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

User NAs

haven introduced an haven_labelled_spss class to deal with user defined missing values in a similar way as SPSS. In such case, additional attributes will be used to indicate with values should be considered as missing, but such values will not be stored as internal NA values. You should note that most R function will not take this information into account. Therefore, you will have to convert missing values into NA if required before analysis. These defined missing values could co-exist with internal NA values.

Creation

User NAs could be created directly with labelled_spss(). You can also manipulate them with na_values() and na_range().

v <- labelled(c(1, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9))
v
## <labelled<double>[8]>
## [1]  1  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know
na_values(v) <- 9
v
## <labelled_spss<double>[8]>
## [1]  1  2  3  9  1  3  2 NA
## Missing values: 9
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know
na_values(v) <- NULL
v
## <labelled<double>[8]>
## [1]  1  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know
na_range(v) <- c(5, Inf)
na_range(v)
## [1]   5 Inf
v
## <labelled_spss<double>[8]>
## [1]  1  2  3  9  1  3  2 NA
## Missing range:  [5, Inf]
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know

NB: you cant also use set_na_range() and set_na_values() for a dplyr-like syntax.

library(dplyr)
# setting value labels and user NAs
df <- tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) %>%
  set_value_labels(s2 = c(yes = 1, no = 2)) %>%
  set_na_values(s2 = 9)
df$s2
## <labelled_spss<double>[4]>
## [1] 1 1 2 9
## Missing values: 9
## 
## Labels:
##  value label
##      1   yes
##      2    no
# removing user NAs
df <- df %>% set_na_values(s2 = NULL)
df$s2
## <labelled<double>[4]>
## [1] 1 1 2 9
## 
## Labels:
##  value label
##      1   yes
##      2    no

Tests

Note that is.na() will return TRUE for user NAs. Use is_user_na() to test if a specific value is a user NA and is_regular_na() to test if it is a regular NA.

v
## <labelled_spss<double>[8]>
## [1]  1  2  3  9  1  3  2 NA
## Missing range:  [5, Inf]
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know
is.na(v)
## [1] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE
is_user_na(v)
## [1] FALSE FALSE FALSE  TRUE FALSE FALSE FALSE FALSE
is_regular_na(v)
## [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE  TRUE

Conversion

For most R functions, user NAs values are still regular values.

x <- c(1:5, 11:15)
na_range(x) <- c(10, Inf)
val_labels(x) <- c("dk" = 11, "refused" = 15)
x
## <labelled_spss<integer>[10]>
##  [1]  1  2  3  4  5 11 12 13 14 15
## Missing range:  [10, Inf]
## 
## Labels:
##  value   label
##     11      dk
##     15 refused
mean(x)
## [1] 8

You can convert user NAs into regular NAs with user_na_to_na() or user_na_to_regular_na() (both functions are identical).

user_na_to_na(x)
## <labelled<integer>[10]>
##  [1]  1  2  3  4  5 NA NA NA NA NA
mean(user_na_to_na(x), na.rm = TRUE)
## [1] 3

Alternatively, if the vector is numeric, you can convert user NAs into tagged NAs with user_na_to_tagged_na().

user_na_to_tagged_na(x)
## 'x' has been converted into a double vector.
## <labelled<double>[10]>
##  [1]     1     2     3     4     5 NA(a) NA(b) NA(c) NA(d) NA(e)
## 
## Labels:
##  value   label
##  NA(a)      dk
##  NA(e) refused
mean(user_na_to_tagged_na(x), na.rm = TRUE)
## 'x' has been converted into a double vector.
## [1] 3

Finally, you can also remove user NAs definition without converting these values to NA, using remove_user_na().

remove_user_na(x)
## <labelled<integer>[10]>
##  [1]  1  2  3  4  5 11 12 13 14 15
## 
## Labels:
##  value   label
##     11      dk
##     15 refused
mean(remove_user_na(x))
## [1] 8
labelled/inst/doc/look_for.html0000644000176200001440000013464314444575737016277 0ustar liggesusers Generate a data dictionnary and search for variables with look_for()

Generate a data dictionnary and search for variables with look_for()

Joseph Larmarange

Showing a summary of a data frame

Default printing of tibbles

It is a common need to easily get a description of all variables in a data frame.

When a data frame is converted into a tibble (e.g. with dplyr::as_tibble()), it as a nice printing showing the first rows of the data frame as well as the type of column.

library(dplyr)
iris %>% as_tibble()
## # A tibble: 150 × 5
##    Sepal.Length Sepal.Width Petal.Length Petal.Width Species
##           <dbl>       <dbl>        <dbl>       <dbl> <fct>  
##  1          5.1         3.5          1.4         0.2 setosa 
##  2          4.9         3            1.4         0.2 setosa 
##  3          4.7         3.2          1.3         0.2 setosa 
##  4          4.6         3.1          1.5         0.2 setosa 
##  5          5           3.6          1.4         0.2 setosa 
##  6          5.4         3.9          1.7         0.4 setosa 
##  7          4.6         3.4          1.4         0.3 setosa 
##  8          5           3.4          1.5         0.2 setosa 
##  9          4.4         2.9          1.4         0.2 setosa 
## 10          4.9         3.1          1.5         0.1 setosa 
## # ℹ 140 more rows

However, when you have too many variables, all of them cannot be printed and their are just listed.

data(fertility, package = "questionr")
women
## # A tibble: 2,000 × 17
##    id_woman id_household weight interview_date date_of_birth   age residency
##       <dbl>        <dbl>  <dbl> <date>         <date>        <dbl> <dbl+lbl>
##  1      391          381  1.80  2012-05-05     1997-03-07       15 2 [rural]
##  2     1643         1515  1.80  2012-01-23     1982-01-06       30 2 [rural]
##  3       85           85  1.80  2012-01-21     1979-01-01       33 2 [rural]
##  4      881          844  1.80  2012-01-06     1968-03-29       43 2 [rural]
##  5     1981         1797  1.80  2012-05-11     1986-05-25       25 2 [rural]
##  6     1072         1015  0.998 2012-02-20     1993-07-03       18 2 [rural]
##  7     1978         1794  0.998 2012-02-23     1967-01-28       45 2 [rural]
##  8     1607         1486  0.998 2012-02-20     1989-01-21       23 2 [rural]
##  9      738          711  0.192 2012-03-09     1962-07-24       49 2 [rural]
## 10     1656         1525  0.192 2012-03-15     1980-12-25       31 2 [rural]
## # ℹ 1,990 more rows
## # ℹ 10 more variables: region <dbl+lbl>, instruction <dbl+lbl>,
## #   employed <dbl+lbl>, matri <dbl+lbl>, religion <dbl+lbl>,
## #   newspaper <dbl+lbl>, radio <dbl+lbl>, tv <dbl+lbl>,
## #   ideal_nb_children <dbl+lbl>, test <dbl+lbl>

Note: in R console, value labels (if defined) are usually printed but they do not appear in a R markdown document like this vignette.

dplyr::glimpse()

The function dplyr::glimpse() allows you to have a quick look at all the variables in a data frame.

glimpse(iris)
## Rows: 150
## Columns: 5
## $ Sepal.Length <dbl> 5.1, 4.9, 4.7, 4.6, 5.0, 5.4, 4.6, 5.0, 4.4, 4.9, 5.4, 4.…
## $ Sepal.Width  <dbl> 3.5, 3.0, 3.2, 3.1, 3.6, 3.9, 3.4, 3.4, 2.9, 3.1, 3.7, 3.…
## $ Petal.Length <dbl> 1.4, 1.4, 1.3, 1.5, 1.4, 1.7, 1.4, 1.5, 1.4, 1.5, 1.5, 1.…
## $ Petal.Width  <dbl> 0.2, 0.2, 0.2, 0.2, 0.2, 0.4, 0.3, 0.2, 0.2, 0.1, 0.2, 0.…
## $ Species      <fct> setosa, setosa, setosa, setosa, setosa, setosa, setosa, s…
glimpse(women)
## Rows: 2,000
## Columns: 17
## $ id_woman          <dbl> 391, 1643, 85, 881, 1981, 1072, 1978, 1607, 738, 165…
## $ id_household      <dbl> 381, 1515, 85, 844, 1797, 1015, 1794, 1486, 711, 152…
## $ weight            <dbl> 1.803150, 1.803150, 1.803150, 1.803150, 1.803150, 0.…
## $ interview_date    <date> 2012-05-05, 2012-01-23, 2012-01-21, 2012-01-06, 201…
## $ date_of_birth     <date> 1997-03-07, 1982-01-06, 1979-01-01, 1968-03-29, 198…
## $ age               <dbl> 15, 30, 33, 43, 25, 18, 45, 23, 49, 31, 26, 45, 25, …
## $ residency         <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ region            <dbl+lbl> 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, …
## $ instruction       <dbl+lbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 0, …
## $ employed          <dbl+lbl> 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ matri             <dbl+lbl> 0, 2, 2, 2, 1, 0, 1, 1, 2, 5, 2, 3, 0, 2, 1, 2, …
## $ religion          <dbl+lbl> 1, 3, 2, 3, 2, 2, 3, 1, 3, 3, 2, 3, 2, 2, 2, 2, …
## $ newspaper         <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ radio             <dbl+lbl> 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, …
## $ tv                <dbl+lbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, …
## $ ideal_nb_children <dbl+lbl>  4,  4,  4,  4,  4,  5, 10,  5,  4,  5,  6, 10, …
## $ test              <dbl+lbl> 0, 9, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, …

It will show you the first values of each variable as well as the type of each variable. However, some important informations are not displayed:

  • variable labels, when defined;
  • value labels for labelled vectors;
  • the list of levels for factors;
  • the range of values for numerical variables.

labelled::look_for()

look_for() provided by the labelled package will print in the console a data dictionary of all variables, showing variable labels when available, the type of variable and a list of values corresponding to:

  • levels for factors;
  • value labels for labelled vectors;
  • the range of observed values in the vector otherwise (if details = "full").
library(labelled)
look_for(iris)
##  pos variable     label           col_type missing values    
##  1   Sepal.Length —               dbl      0                 
##  2   Sepal.Width  —               dbl      0                 
##  3   Petal.Length Length of petal dbl      0                 
##  4   Petal.Width  Width of Petal  dbl      0                 
##  5   Species      —               fct      0       setosa    
##                                                    versicolor
##                                                    virginica
look_for(women)
##  pos variable          label                     col_type missing
##  1   id_woman          Woman Id                  dbl      0      
##  2   id_household      Household Id              dbl      0      
##  3   weight            Sample weight             dbl      0      
##  4   interview_date    Interview date            date     0      
##  5   date_of_birth     Date of birth             date     0      
##  6   age               Age at last anniversary ~ dbl      0      
##  7   residency         Urban / rural residency   dbl+lbl  0      
##                                                                  
##  8   region            Region                    dbl+lbl  0      
##                                                                  
##                                                                  
##                                                                  
##  9   instruction       Level of instruction      dbl+lbl  0      
##                                                                  
##                                                                  
##                                                                  
##  10  employed          Employed?                 dbl+lbl  7      
##                                                                  
##                                                                  
##  11  matri             Matrimonial status        dbl+lbl  0      
##                                                                  
##                                                                  
##                                                                  
##                                                                  
##                                                                  
##  12  religion          Religion                  dbl+lbl  4      
##                                                                  
##                                                                  
##                                                                  
##                                                                  
##  13  newspaper         Read newspaper?           dbl+lbl  0      
##                                                                  
##  14  radio             Listen to radio?          dbl+lbl  0      
##                                                                  
##  15  tv                Watch TV?                 dbl+lbl  0      
##                                                                  
##  16  ideal_nb_children Ideal number of children  dbl+lbl  0      
##                                                                  
##  17  test              Ever tested for HIV?      dbl+lbl  29     
##                                                                  
##                                                                  
##  values             
##                     
##                     
##                     
##                     
##                     
##                     
##  [1] urban          
##  [2] rural          
##  [1] North          
##  [2] East           
##  [3] South          
##  [4] West           
##  [0] none           
##  [1] primary        
##  [2] secondary      
##  [3] higher         
##  [0] no             
##  [1] yes            
##  [9] missing        
##  [0] single         
##  [1] married        
##  [2] living together
##  [3] windowed       
##  [4] divorced       
##  [5] separated      
##  [1] Muslim         
##  [2] Christian      
##  [3] Protestant     
##  [4] no religion    
##  [5] other          
##  [0] no             
##  [1] yes            
##  [0] no             
##  [1] yes            
##  [0] no             
##  [1] yes            
##  [96] don't know    
##  [99] missing       
##  [0] no             
##  [1] yes            
##  [9] missing

Note that lookfor() and generate_dictionary() are synonyms of look_for() and works exactly in the same way.

If there is not enough space to print full labels in the console, they will be truncated (truncation is indicated by a ~).

Searching variables by key

When a data frame has dozens or even hundreds of variables, it could become difficult to find a specific variable. In such case, you can provide an optional list of keywords, which can be simple character strings or regular expression, to search for specific variables.

# Look for a single keyword.
look_for(iris, "petal")
##  pos variable     label           col_type missing values
##  3   Petal.Length Length of petal dbl      0             
##  4   Petal.Width  Width of Petal  dbl      0
look_for(iris, "s")
##  pos variable     label col_type missing values    
##  1   Sepal.Length —     dbl      0                 
##  2   Sepal.Width  —     dbl      0                 
##  5   Species      —     fct      0       setosa    
##                                          versicolor
##                                          virginica
# Look for with a regular expression
look_for(iris, "petal|species")
##  pos variable     label           col_type missing values    
##  3   Petal.Length Length of petal dbl      0                 
##  4   Petal.Width  Width of Petal  dbl      0                 
##  5   Species      —               fct      0       setosa    
##                                                    versicolor
##                                                    virginica
look_for(iris, "s$")
##  pos variable label col_type missing values    
##  5   Species  —     fct      0       setosa    
##                                      versicolor
##                                      virginica
# Look for with several keywords
look_for(iris, "pet", "sp")
##  pos variable     label           col_type missing values    
##  3   Petal.Length Length of petal dbl      0                 
##  4   Petal.Width  Width of Petal  dbl      0                 
##  5   Species      —               fct      0       setosa    
##                                                    versicolor
##                                                    virginica
# Look_for will take variable labels into account
look_for(women, "read", "level")
##  pos variable    label                col_type missing values       
##  9   instruction Level of instruction dbl+lbl  0       [0] none     
##                                                        [1] primary  
##                                                        [2] secondary
##                                                        [3] higher   
##  13  newspaper   Read newspaper?      dbl+lbl  0       [0] no       
##                                                        [1] yes

By default, look_for() will look through both variable names and variables labels. Use labels = FALSE to look only through variable names.

look_for(women, "read")
##  pos variable  label           col_type missing values 
##  13  newspaper Read newspaper? dbl+lbl  0       [0] no 
##                                                 [1] yes
look_for(women, "read", labels = FALSE)
## Nothing found. Sorry.

Similarly, the search is by default case insensitive. To make the search case sensitive, use ignore.case = FALSE.

look_for(iris, "sepal")
##  pos variable     label col_type missing values
##  1   Sepal.Length —     dbl      0             
##  2   Sepal.Width  —     dbl      0
look_for(iris, "sepal", ignore.case = FALSE)
## Nothing found. Sorry.

Level of details

If you just want to use the search feature of look_for() without computing the details of each variable, simply indicate details = "none" or details = FALSE.

look_for(women, "id", details = "none")
##  pos variable          label                   
##   1  id_woman          Woman Id                
##   2  id_household      Household Id            
##   7  residency         Urban / rural residency 
##  16  ideal_nb_children Ideal number of children

If you want more details (but can be time consuming for big data frames), indicate details = "full" or details = TRUE.

look_for(women, details = "full")
##  pos variable          label                  col_type missing unique_values
##  1   id_woman          Woman Id               dbl      0       2000         
##  2   id_household      Household Id           dbl      0       1814         
##  3   weight            Sample weight          dbl      0       351          
##  4   interview_date    Interview date         date     0       165          
##  5   date_of_birth     Date of birth          date     0       1740         
##  6   age               Age at last anniversa~ dbl      0       36           
##  7   residency         Urban / rural residen~ dbl+lbl  0       2            
##                                                                             
##  8   region            Region                 dbl+lbl  0       4            
##                                                                             
##                                                                             
##                                                                             
##  9   instruction       Level of instruction   dbl+lbl  0       4            
##                                                                             
##                                                                             
##                                                                             
##  10  employed          Employed?              dbl+lbl  7       3            
##                                                                             
##                                                                             
##  11  matri             Matrimonial status     dbl+lbl  0       6            
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  12  religion          Religion               dbl+lbl  4       6            
##                                                                             
##                                                                             
##                                                                             
##                                                                             
##  13  newspaper         Read newspaper?        dbl+lbl  0       2            
##                                                                             
##  14  radio             Listen to radio?       dbl+lbl  0       2            
##                                                                             
##  15  tv                Watch TV?              dbl+lbl  0       2            
##                                                                             
##  16  ideal_nb_children Ideal number of child~ dbl+lbl  0       18           
##                                                                             
##  17  test              Ever tested for HIV?   dbl+lbl  29      3            
##                                                                             
##                                                                             
##  values                 na_values na_range
##  range: 1 - 2000                          
##  range: 1 - 1814                          
##  range: 0.044629 - 4.3~                   
##  range: 2011-12-01 - 2~                   
##  range: 1962-02-07 - 1~                   
##  range: 14 - 49                           
##  [1] urban                                
##  [2] rural                                
##  [1] North                                
##  [2] East                                 
##  [3] South                                
##  [4] West                                 
##  [0] none                                 
##  [1] primary                              
##  [2] secondary                            
##  [3] higher                               
##  [0] no                 9                 
##  [1] yes                                  
##  [9] missing                              
##  [0] single                               
##  [1] married                              
##  [2] living together                      
##  [3] windowed                             
##  [4] divorced                             
##  [5] separated                            
##  [1] Muslim                               
##  [2] Christian                            
##  [3] Protestant                           
##  [4] no religion                          
##  [5] other                                
##  [0] no                                   
##  [1] yes                                  
##  [0] no                                   
##  [1] yes                                  
##  [0] no                                   
##  [1] yes                                  
##  [96] don't know                          
##  [99] missing                             
##  [0] no                 9                 
##  [1] yes                                  
##  [9] missing
look_for(women, details = "full") %>%
  dplyr::glimpse()
## Rows: 17
## Columns: 14
## $ pos           <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
## $ variable      <chr> "id_woman", "id_household", "weight", "interview_date", …
## $ label         <chr> "Woman Id", "Household Id", "Sample weight", "Interview …
## $ col_type      <chr> "dbl", "dbl", "dbl", "date", "date", "dbl", "dbl+lbl", "…
## $ missing       <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 4, 0, 0, 0, 0, 29
## $ levels        <named list> <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, …
## $ value_labels  <named list> <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <1, 2>, …
## $ class         <named list> "numeric", "numeric", "numeric", "Date", "Date", …
## $ type          <chr> "double", "double", "double", "double", "double",…
## $ na_values     <named list> <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <…
## $ na_range      <named list> <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, …
## $ n_na          <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 4, 0, 0, 0, 0, 29
## $ unique_values <int> 2000, 1814, 351, 165, 1740, 36, 2, 4, 4, 3, 6, 6,…
## $ range         <named list> <1, 2000>, <1, 1814>, <0.044629, 4.396831>, <2011…

Advanced usages of look_for()

look_for() returns a detailed tibble which is summarized before printing. To deactivate default printing and see full results, simply use dplyr::as_tibble(), dplyr::glimpse() or even utils::View().

look_for(women) %>% View()
look_for(women) %>% as_tibble()
## # A tibble: 17 × 7
##      pos variable          label            col_type missing levels value_labels
##    <int> <chr>             <chr>            <chr>      <int> <name> <named list>
##  1     1 id_woman          Woman Id         dbl            0 <NULL> <NULL>      
##  2     2 id_household      Household Id     dbl            0 <NULL> <NULL>      
##  3     3 weight            Sample weight    dbl            0 <NULL> <NULL>      
##  4     4 interview_date    Interview date   date           0 <NULL> <NULL>      
##  5     5 date_of_birth     Date of birth    date           0 <NULL> <NULL>      
##  6     6 age               Age at last ann… dbl            0 <NULL> <NULL>      
##  7     7 residency         Urban / rural r… dbl+lbl        0 <NULL> <dbl [2]>   
##  8     8 region            Region           dbl+lbl        0 <NULL> <dbl [4]>   
##  9     9 instruction       Level of instru… dbl+lbl        0 <NULL> <dbl [4]>   
## 10    10 employed          Employed?        dbl+lbl        7 <NULL> <dbl [3]>   
## 11    11 matri             Matrimonial sta… dbl+lbl        0 <NULL> <dbl [6]>   
## 12    12 religion          Religion         dbl+lbl        4 <NULL> <dbl [5]>   
## 13    13 newspaper         Read newspaper?  dbl+lbl        0 <NULL> <dbl [2]>   
## 14    14 radio             Listen to radio? dbl+lbl        0 <NULL> <dbl [2]>   
## 15    15 tv                Watch TV?        dbl+lbl        0 <NULL> <dbl [2]>   
## 16    16 ideal_nb_children Ideal number of… dbl+lbl        0 <NULL> <dbl [2]>   
## 17    17 test              Ever tested for… dbl+lbl       29 <NULL> <dbl [3]>
glimpse(look_for(women))
## Rows: 17
## Columns: 7
## $ pos          <int> 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17
## $ variable     <chr> "id_woman", "id_household", "weight", "interview_date", "…
## $ label        <chr> "Woman Id", "Household Id", "Sample weight", "Interview d…
## $ col_type     <chr> "dbl", "dbl", "dbl", "date", "date", "dbl", "dbl+lbl", "d…
## $ missing      <int> 0, 0, 0, 0, 0, 0, 0, 0, 0, 7, 0, 4, 0, 0, 0, 0, 29
## $ levels       <named list> <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <…
## $ value_labels <named list> <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <NULL>, <1, 2>, <…

The tibble returned by look_for() could be easily manipulated for advanced programming.

When a column has several values for one variable (e.g. levels or value_labels), results as stored with nested named list. You can convert named lists into simpler character vectors, you can use convert_list_columns_to_character().

look_for(women) %>% convert_list_columns_to_character()
## # A tibble: 17 × 7
##      pos variable          label            col_type missing levels value_labels
##    <int> <chr>             <chr>            <chr>      <int> <chr>  <chr>       
##  1     1 id_woman          Woman Id         dbl            0 ""     ""          
##  2     2 id_household      Household Id     dbl            0 ""     ""          
##  3     3 weight            Sample weight    dbl            0 ""     ""          
##  4     4 interview_date    Interview date   date           0 ""     ""          
##  5     5 date_of_birth     Date of birth    date           0 ""     ""          
##  6     6 age               Age at last ann… dbl            0 ""     ""          
##  7     7 residency         Urban / rural r… dbl+lbl        0 ""     "[1] urban;…
##  8     8 region            Region           dbl+lbl        0 ""     "[1] North;…
##  9     9 instruction       Level of instru… dbl+lbl        0 ""     "[0] none; …
## 10    10 employed          Employed?        dbl+lbl        7 ""     "[0] no; [1…
## 11    11 matri             Matrimonial sta… dbl+lbl        0 ""     "[0] single…
## 12    12 religion          Religion         dbl+lbl        4 ""     "[1] Muslim…
## 13    13 newspaper         Read newspaper?  dbl+lbl        0 ""     "[0] no; [1…
## 14    14 radio             Listen to radio? dbl+lbl        0 ""     "[0] no; [1…
## 15    15 tv                Watch TV?        dbl+lbl        0 ""     "[0] no; [1…
## 16    16 ideal_nb_children Ideal number of… dbl+lbl        0 ""     "[96] don't…
## 17    17 test              Ever tested for… dbl+lbl       29 ""     "[0] no; [1…

Alternatively, you can use lookfor_to_long_format() to transform results into a long format with one row per factor level and per value label.

look_for(women) %>% lookfor_to_long_format()
## # A tibble: 41 × 7
##      pos variable       label               col_type missing levels value_labels
##    <int> <chr>          <chr>               <chr>      <int> <chr>  <chr>       
##  1     1 id_woman       Woman Id            dbl            0 <NA>   <NA>        
##  2     2 id_household   Household Id        dbl            0 <NA>   <NA>        
##  3     3 weight         Sample weight       dbl            0 <NA>   <NA>        
##  4     4 interview_date Interview date      date           0 <NA>   <NA>        
##  5     5 date_of_birth  Date of birth       date           0 <NA>   <NA>        
##  6     6 age            Age at last annive… dbl            0 <NA>   <NA>        
##  7     7 residency      Urban / rural resi… dbl+lbl        0 <NA>   [1] urban   
##  8     7 residency      Urban / rural resi… dbl+lbl        0 <NA>   [2] rural   
##  9     8 region         Region              dbl+lbl        0 <NA>   [1] North   
## 10     8 region         Region              dbl+lbl        0 <NA>   [2] East    
## # ℹ 31 more rows

Both can be combined:

look_for(women) %>%
  lookfor_to_long_format() %>%
  convert_list_columns_to_character()
## # A tibble: 41 × 7
##      pos variable       label               col_type missing levels value_labels
##    <int> <chr>          <chr>               <chr>      <int> <chr>  <chr>       
##  1     1 id_woman       Woman Id            dbl            0 <NA>   <NA>        
##  2     2 id_household   Household Id        dbl            0 <NA>   <NA>        
##  3     3 weight         Sample weight       dbl            0 <NA>   <NA>        
##  4     4 interview_date Interview date      date           0 <NA>   <NA>        
##  5     5 date_of_birth  Date of birth       date           0 <NA>   <NA>        
##  6     6 age            Age at last annive… dbl            0 <NA>   <NA>        
##  7     7 residency      Urban / rural resi… dbl+lbl        0 <NA>   [1] urban   
##  8     7 residency      Urban / rural resi… dbl+lbl        0 <NA>   [2] rural   
##  9     8 region         Region              dbl+lbl        0 <NA>   [1] North   
## 10     8 region         Region              dbl+lbl        0 <NA>   [2] East    
## # ℹ 31 more rows
labelled/inst/doc/look_for.R0000644000176200001440000000440414444575737015523 0ustar liggesusers## ----message=FALSE------------------------------------------------------------ library(dplyr) ## ----------------------------------------------------------------------------- iris %>% as_tibble() ## ----------------------------------------------------------------------------- data(fertility, package = "questionr") women ## ----------------------------------------------------------------------------- glimpse(iris) glimpse(women) ## ----------------------------------------------------------------------------- library(labelled) look_for(iris) look_for(women) ## ----------------------------------------------------------------------------- # Look for a single keyword. look_for(iris, "petal") look_for(iris, "s") # Look for with a regular expression look_for(iris, "petal|species") look_for(iris, "s$") # Look for with several keywords look_for(iris, "pet", "sp") # Look_for will take variable labels into account look_for(women, "read", "level") ## ----------------------------------------------------------------------------- look_for(women, "read") look_for(women, "read", labels = FALSE) ## ----------------------------------------------------------------------------- look_for(iris, "sepal") look_for(iris, "sepal", ignore.case = FALSE) ## ----------------------------------------------------------------------------- look_for(women, "id", details = "none") ## ----------------------------------------------------------------------------- look_for(women, details = "full") look_for(women, details = "full") %>% dplyr::glimpse() ## ---- eval=FALSE-------------------------------------------------------------- # look_for(women) %>% View() ## ----------------------------------------------------------------------------- look_for(women) %>% as_tibble() glimpse(look_for(women)) ## ----------------------------------------------------------------------------- look_for(women) %>% convert_list_columns_to_character() ## ----------------------------------------------------------------------------- look_for(women) %>% lookfor_to_long_format() ## ----------------------------------------------------------------------------- look_for(women) %>% lookfor_to_long_format() %>% convert_list_columns_to_character() labelled/inst/doc/packed_columns.Rmd0000644000176200001440000000464014444527456017216 0ustar liggesusers--- author: "Joseph Larmarange" title: "Variables labels and packed columns" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Variables labels and packed columns} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- The **tidyr** allows to group several columns of a tibble into one single df-column, see `tidyr::pack()`. Such df-column is itself a tibble. It's not currently clear why you would ever want to pack columns since few functions work with this sort of data. ```{r} library(tidyr) d <- iris %>% as_tibble() %>% pack( Sepal = starts_with("Sepal"), Petal = starts_with("Petal"), .names_sep = "." ) str(d) class(d$Sepal) ``` Regarding variable labels, you may want to define a label for one sub-column of a df-column, or eventually a label for the df-column itself. For a sub-column, you could use easily `var_label()` to define your label. ```{r} library(labelled) var_label(d$Sepal$Length) <- "Length of the sepal" str(d) ``` But you cannot use directly `var_label()` for the df-column. ```{r} var_label(d$Petal) <- "wrong label for Petal" str(d) ``` As `d$Petal` is itself a tibble, applying `var_label()` on it would have an effect on each sub-column. To change a variable label to the df-column itself, you could use `label_attribute()`. ```{r} label_attribute(d$Petal) <- "correct label for Petal" str(d) ``` On the other hand, `set_variable_labels()` works differently, as the primary intention of this function is to work on the columns of a tibble. ```{r} d <- d %>% set_variable_labels(Sepal = "Label of the Sepal df-column") str(d) ``` This is equivalent to: ```{r} var_label(d) <- list(Sepal = "Label of the Sepal df-column") str(d) ``` To use `set_variable_labels()` on sub-columns, you should use this syntax: ```{r} d$Petal <- d$Petal %>% set_variable_labels( Length = "Petal length", Width = "Petal width" ) str(d) ``` If you want to get the list of variable labels of a tibble, by default `var_label()` or `get_variable_labels()` will return the labels of the first level of columns. ```{r} d %>% get_variable_labels() ``` To obtain the list of variable labels for sub-columns, you could use `recurse = TRUE`: ```{r} d %>% get_variable_labels(recurse = TRUE) d %>% get_variable_labels( recurse = TRUE, null_action = "fill", unlist = TRUE ) ``` labelled/inst/doc/look_for.Rmd0000644000176200001440000001150614357761455016042 0ustar liggesusers--- author: "Joseph Larmarange" title: "Generate a data dictionnary and search for variables with `look_for()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Generate a data dictionnary and search for variables with `look_for()`} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- ## Showing a summary of a data frame ### Default printing of tibbles It is a common need to easily get a description of all variables in a data frame. When a data frame is converted into a tibble (e.g. with `dplyr::as_tibble()`), it as a nice printing showing the first rows of the data frame as well as the type of column. ```{r message=FALSE} library(dplyr) ``` ```{r} iris %>% as_tibble() ``` However, when you have too many variables, all of them cannot be printed and their are just listed. ```{r} data(fertility, package = "questionr") women ``` Note: in **R** console, value labels (if defined) are usually printed but they do not appear in a R markdown document like this vignette. ### `dplyr::glimpse()` The function `dplyr::glimpse()` allows you to have a quick look at all the variables in a data frame. ```{r} glimpse(iris) glimpse(women) ``` It will show you the first values of each variable as well as the type of each variable. However, some important informations are not displayed: - variable labels, when defined; - value labels for labelled vectors; - the list of levels for factors; - the range of values for numerical variables. ### `labelled::look_for()` `look_for()` provided by the `labelled` package will print in the console a data dictionary of all variables, showing variable labels when available, the type of variable and a list of values corresponding to: - levels for factors; - value labels for labelled vectors; - the range of observed values in the vector otherwise (if `details = "full"`). ```{r} library(labelled) look_for(iris) look_for(women) ``` Note that `lookfor()` and `generate_dictionary()` are synonyms of `look_for()` and works exactly in the same way. If there is not enough space to print full labels in the console, they will be truncated (truncation is indicated by a `~`). ## Searching variables by key When a data frame has dozens or even hundreds of variables, it could become difficult to find a specific variable. In such case, you can provide an optional list of keywords, which can be simple character strings or regular expression, to search for specific variables. ```{r} # Look for a single keyword. look_for(iris, "petal") look_for(iris, "s") # Look for with a regular expression look_for(iris, "petal|species") look_for(iris, "s$") # Look for with several keywords look_for(iris, "pet", "sp") # Look_for will take variable labels into account look_for(women, "read", "level") ``` By default, `look_for()` will look through both variable names and variables labels. Use `labels = FALSE` to look only through variable names. ```{r} look_for(women, "read") look_for(women, "read", labels = FALSE) ``` Similarly, the search is by default case insensitive. To make the search case sensitive, use `ignore.case = FALSE`. ```{r} look_for(iris, "sepal") look_for(iris, "sepal", ignore.case = FALSE) ``` ## Level of details If you just want to use the search feature of `look_for()` without computing the details of each variable, simply indicate `details = "none"` or `details = FALSE`. ```{r} look_for(women, "id", details = "none") ``` If you want more details (but can be time consuming for big data frames), indicate `details = "full"` or `details = TRUE`. ```{r} look_for(women, details = "full") look_for(women, details = "full") %>% dplyr::glimpse() ``` ## Advanced usages of `look_for()` `look_for()` returns a detailed tibble which is summarized before printing. To deactivate default printing and see full results, simply use `dplyr::as_tibble()`, `dplyr::glimpse()` or even `utils::View()`. ```{r, eval=FALSE} look_for(women) %>% View() ``` ```{r} look_for(women) %>% as_tibble() glimpse(look_for(women)) ``` The tibble returned by `look_for()` could be easily manipulated for advanced programming. When a column has several values for one variable (e.g. `levels` or `value_labels`), results as stored with nested named list. You can convert named lists into simpler character vectors, you can use `convert_list_columns_to_character()`. ```{r} look_for(women) %>% convert_list_columns_to_character() ``` Alternatively, you can use `lookfor_to_long_format()` to transform results into a long format with one row per factor level and per value label. ```{r} look_for(women) %>% lookfor_to_long_format() ``` Both can be combined: ```{r} look_for(women) %>% lookfor_to_long_format() %>% convert_list_columns_to_character() ``` labelled/inst/doc/missing_values.Rmd0000644000176200001440000001445514357761455017266 0ustar liggesusers--- author: "Joseph Larmarange" title: "About missing values: regular NAs, tagged NAs and user NAs" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{About missing values: regular NAs, tagged NAs and user NAs} %\VignetteEngine{knitr::rmarkdown} \usepackage[utf8]{inputenc} --- In base **R**, missing values are indicated using the specific value `NA`. **Regular NAs** could be used with any type of vector (double, integer, character, factor, Date, etc.). Other statistical software have implemented ways to differentiate several types of missing values. **Stata** and **SAS** have a system of **tagged NAs**, where NA values are tagged with a letter (from a to z). **SPSS** allows users to indicate that certain non-missing values should be treated in some analysis as missing (**user NAs**). The `haven` package implements **tagged NAs** and **user NAs** in order to keep this information when importing files from **Stata**, **SAS** or **SPSS**. ```{r} library(labelled) ``` ## Tagged NAs ### Creation and tests **Tagged NAs** are proper `NA` values with a tag attached to them. They can be created with `tagged_na()`. The attached tag should be a single letter, lowercase (a-z) or uppercase (A-Z). ```{r} x <- c(1:5, tagged_na("a"), tagged_na("z"), NA) ``` For most **R** functions, tagged NAs are just considered as regular NAs. By default, they are just printed as any other regular NA. ```{r} x is.na(x) ``` To show/print their tags, you need to use `na_tag()`, `print_tagged_na()` or `format_tagged_na()`. ```{r} na_tag(x) print_tagged_na(x) format_tagged_na(x) ``` To test if a certain NA is a regular NA or a tagged NA, you should use `is_regular_na()` or `is_tagged_na()`. ```{r} is.na(x) is_tagged_na(x) # You can test for specific tagged NAs with the second argument is_tagged_na(x, "a") is_regular_na(x) ``` Tagged NAs could be defined **only** for double vectors. If you add a tagged NA to a character vector, it will be converted into a regular NA. If you add a tagged NA to an integer vector, the vector will be converted into a double vector. ```{r, error=TRUE} y <- c("a", "b", tagged_na("z")) y is_tagged_na(y) format_tagged_na(y) z <- c(1L, 2L, tagged_na("a")) typeof(z) format_tagged_na(z) ``` ### Unique values, duplicates and sorting with tagged NAs By default, functions such as `base::unique()`, `base::duplicated()`, `base::order()` or `base::sort()` will treat tagged NAs as the same thing as a regular NA. You can use `unique_tagged_na()`, `duplicated_tagged_na()`, `order_tagged_na()` and `sort_tagged_na()` as alternatives that will treat two tagged NAs with different tags as separate values. ```{r} x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA) x %>% print_tagged_na() unique(x) %>% print_tagged_na() unique_tagged_na(x) %>% print_tagged_na() duplicated(x) duplicated_tagged_na(x) sort(x, na.last = TRUE) %>% print_tagged_na() sort_tagged_na(x) %>% print_tagged_na() ``` ### Tagged NAs and value labels It is possible to define value labels for tagged NAs. ```{r} x <- c(1, 0, 1, tagged_na("r"), 0, tagged_na("d"), tagged_na("z"), NA) val_labels(x) <- c( no = 0, yes = 1, "don't know" = tagged_na("d"), refusal = tagged_na("r") ) x ``` When converting such labelled vector into factor, tagged NAs are, by default, converted into regular NAs (it is not possible to define tagged NAs with factors). ```{r} to_factor(x) ``` However, the option `explicit_tagged_na` of `to_factor()` allows to transform tagged NAs into explicit factor levels. ```{r} to_factor(x, explicit_tagged_na = TRUE) to_factor(x, levels = "prefixed", explicit_tagged_na = TRUE) ``` ### Conversion into user NAs Tagged NAs can be converted into user NAs with `tagged_na_to_user_na()`. ```{r} tagged_na_to_user_na(x) tagged_na_to_user_na(x, user_na_start = 10) ``` Use `tagged_na_to_regular_na()` to convert tagged NAs into regular NAs. ```{r} tagged_na_to_regular_na(x) tagged_na_to_regular_na(x) %>% is_tagged_na() ``` ## User NAs `haven` introduced an `haven_labelled_spss` class to deal with user defined missing values in a similar way as **SPSS**. In such case, additional attributes will be used to indicate with values should be considered as missing, but such values will not be stored as internal `NA` values. You should note that most R function will not take this information into account. Therefore, you will have to convert missing values into `NA` if required before analysis. These defined missing values could co-exist with internal `NA` values. ### Creation User NAs could be created directly with `labelled_spss()`. You can also manipulate them with `na_values()` and `na_range()`. ```{r} v <- labelled(c(1, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9)) v na_values(v) <- 9 v na_values(v) <- NULL v na_range(v) <- c(5, Inf) na_range(v) v ``` NB: you cant also use `set_na_range()` and `set_na_values()` for a `dplyr`-like syntax. ```{r} library(dplyr) # setting value labels and user NAs df <- tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) %>% set_value_labels(s2 = c(yes = 1, no = 2)) %>% set_na_values(s2 = 9) df$s2 # removing user NAs df <- df %>% set_na_values(s2 = NULL) df$s2 ``` ### Tests Note that `is.na()` will return `TRUE` for user NAs. Use `is_user_na()` to test if a specific value is a user NA and `is_regular_na()` to test if it is a regular NA. ```{r} v is.na(v) is_user_na(v) is_regular_na(v) ``` ### Conversion For most **R** functions, user NAs values are **still** regular values. ```{r} x <- c(1:5, 11:15) na_range(x) <- c(10, Inf) val_labels(x) <- c("dk" = 11, "refused" = 15) x mean(x) ``` You can convert user NAs into regular NAs with `user_na_to_na()` or `user_na_to_regular_na()` (both functions are identical). ```{r} user_na_to_na(x) mean(user_na_to_na(x), na.rm = TRUE) ``` Alternatively, if the vector is numeric, you can convert user NAs into tagged NAs with `user_na_to_tagged_na()`. ```{r} user_na_to_tagged_na(x) mean(user_na_to_tagged_na(x), na.rm = TRUE) ``` Finally, you can also remove user NAs definition without converting these values to `NA`, using `remove_user_na()`. ```{r} remove_user_na(x) mean(remove_user_na(x)) ``` labelled/inst/doc/missing_values.R0000644000176200001440000000670014444575740016734 0ustar liggesusers## ----------------------------------------------------------------------------- library(labelled) ## ----------------------------------------------------------------------------- x <- c(1:5, tagged_na("a"), tagged_na("z"), NA) ## ----------------------------------------------------------------------------- x is.na(x) ## ----------------------------------------------------------------------------- na_tag(x) print_tagged_na(x) format_tagged_na(x) ## ----------------------------------------------------------------------------- is.na(x) is_tagged_na(x) # You can test for specific tagged NAs with the second argument is_tagged_na(x, "a") is_regular_na(x) ## ---- error=TRUE-------------------------------------------------------------- y <- c("a", "b", tagged_na("z")) y is_tagged_na(y) format_tagged_na(y) z <- c(1L, 2L, tagged_na("a")) typeof(z) format_tagged_na(z) ## ----------------------------------------------------------------------------- x <- c(1, 2, tagged_na("a"), 1, tagged_na("z"), 2, tagged_na("a"), NA) x %>% print_tagged_na() unique(x) %>% print_tagged_na() unique_tagged_na(x) %>% print_tagged_na() duplicated(x) duplicated_tagged_na(x) sort(x, na.last = TRUE) %>% print_tagged_na() sort_tagged_na(x) %>% print_tagged_na() ## ----------------------------------------------------------------------------- x <- c(1, 0, 1, tagged_na("r"), 0, tagged_na("d"), tagged_na("z"), NA) val_labels(x) <- c( no = 0, yes = 1, "don't know" = tagged_na("d"), refusal = tagged_na("r") ) x ## ----------------------------------------------------------------------------- to_factor(x) ## ----------------------------------------------------------------------------- to_factor(x, explicit_tagged_na = TRUE) to_factor(x, levels = "prefixed", explicit_tagged_na = TRUE) ## ----------------------------------------------------------------------------- tagged_na_to_user_na(x) tagged_na_to_user_na(x, user_na_start = 10) ## ----------------------------------------------------------------------------- tagged_na_to_regular_na(x) tagged_na_to_regular_na(x) %>% is_tagged_na() ## ----------------------------------------------------------------------------- v <- labelled(c(1, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, no = 3, "don't know" = 9)) v na_values(v) <- 9 v na_values(v) <- NULL v na_range(v) <- c(5, Inf) na_range(v) v ## ----------------------------------------------------------------------------- library(dplyr) # setting value labels and user NAs df <- tibble(s1 = c("M", "M", "F", "F"), s2 = c(1, 1, 2, 9)) %>% set_value_labels(s2 = c(yes = 1, no = 2)) %>% set_na_values(s2 = 9) df$s2 # removing user NAs df <- df %>% set_na_values(s2 = NULL) df$s2 ## ----------------------------------------------------------------------------- v is.na(v) is_user_na(v) is_regular_na(v) ## ----------------------------------------------------------------------------- x <- c(1:5, 11:15) na_range(x) <- c(10, Inf) val_labels(x) <- c("dk" = 11, "refused" = 15) x mean(x) ## ----------------------------------------------------------------------------- user_na_to_na(x) mean(user_na_to_na(x), na.rm = TRUE) ## ----------------------------------------------------------------------------- user_na_to_tagged_na(x) mean(user_na_to_tagged_na(x), na.rm = TRUE) ## ----------------------------------------------------------------------------- remove_user_na(x) mean(remove_user_na(x)) labelled/inst/doc/intro_labelled.html0000644000176200001440000050560414444575736017442 0ustar liggesusers Introduction to labelled

Introduction to labelled

Joseph Larmarange

The purpose of the labelled package is to provide functions to manipulate metadata as variable labels, value labels and defined missing values using the haven_labelled and haven_labelled_spss classes introduced in haven package.

These classes allow to add metadata (variable, value labels and SPSS-style missing values) to vectors.

It should be noted that value labels doesn’t imply that your vectors should be considered as categorical or continuous. Therefore, value labels are not intended to be use for data analysis. For example, before performing modeling, you should convert vectors with value labels into factors or into classic numeric/character vectors.

Therefore, two main approaches could be considered.

Two main approaches
Two main approaches

In approach A, haven_labelled vectors are converted into factors or into numeric/character vectors just after data import, using unlabelled(), to_factor() or unclass(). Then, data cleaning, recoding and analysis are performed using classic R vector types.

In approach B, haven_labelled vectors are kept for data cleaning and coding, allowing to preserved original recoding, in particular if data should be reexported after that step. Functions provided by labelled will be useful for managing value labels. However, as in approach A, haven_labelled vectors will have to be converted into classic factors or numeric vectors before data analysis (in particular modeling) as this is the way categorical and continuous variables should be coded for analysis functions.

Variable labels

A variable label could be specified for any vector using var_label().

library(labelled)

var_label(iris$Sepal.Length) <- "Length of sepal"

It’s possible to add a variable label to several columns of a data frame using a named list.

var_label(iris) <- list(
  Petal.Length = "Length of petal",
  Petal.Width = "Width of Petal"
)

To get the variable label, simply call var_label().

var_label(iris$Petal.Width)
## [1] "Width of Petal"
var_label(iris)
## $Sepal.Length
## [1] "Length of sepal"
## 
## $Sepal.Width
## NULL
## 
## $Petal.Length
## [1] "Length of petal"
## 
## $Petal.Width
## [1] "Width of Petal"
## 
## $Species
## NULL

To remove a variable label, use NULL.

var_label(iris$Sepal.Length) <- NULL

In RStudio, variable labels will be displayed in data viewer.

View(iris)

You can display and search through variable names and labels with look_for():

look_for(iris)
##  pos variable     label           col_type missing values    
##  1   Sepal.Length —               dbl      0                 
##  2   Sepal.Width  —               dbl      0                 
##  3   Petal.Length Length of petal dbl      0                 
##  4   Petal.Width  Width of Petal  dbl      0                 
##  5   Species      —               fct      0       setosa    
##                                                    versicolor
##                                                    virginica
look_for(iris, "pet")
##  pos variable     label           col_type missing values
##  3   Petal.Length Length of petal dbl      0             
##  4   Petal.Width  Width of Petal  dbl      0
look_for(iris, details = FALSE)
##  pos variable     label          
##  1   Sepal.Length —              
##  2   Sepal.Width  —              
##  3   Petal.Length Length of petal
##  4   Petal.Width  Width of Petal 
##  5   Species      —

Value labels

The first way to create a labelled vector is to use the labelled() function. It’s not mandatory to provide a label for each value observed in your vector. You can also provide a label for values not observed.

v <- labelled(
  c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA),
  c(yes = 1, no = 3, "don't know" = 8, refused = 9)
)
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      8 don't know
##      9    refused

Use val_labels() to get all value labels and val_label() to get the value label associated with a specific value.

val_labels(v)
##        yes         no don't know    refused 
##          1          3          8          9
val_label(v, 8)
## [1] "don't know"

val_labels() could also be used to modify all the value labels attached to a vector, while val_label() will update only one specific value label.

val_labels(v) <- c(yes = 1, nno = 3, bug = 5)
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value label
##      1   yes
##      3   nno
##      5   bug
val_label(v, 3) <- "no"
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value label
##      1   yes
##      3    no
##      5   bug

With val_label(), you can also add or remove specific value labels.

val_label(v, 2) <- "maybe"
val_label(v, 5) <- NULL
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value label
##      1   yes
##      3    no
##      2 maybe

To remove all value labels, use val_labels() and NULL. The haven_labelled class will also be removed.

val_labels(v) <- NULL
v
##  [1]  1  2  2  2  3  9  1  3  2 NA

Adding a value label to a non labelled vector will apply haven_labelled class to it.

val_label(v, 1) <- "yes"
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value label
##      1   yes

Note that applying val_labels() to a factor will generate an error!

f <- factor(1:3)
f
## [1] 1 2 3
## Levels: 1 2 3
val_labels(f) <- c(yes = 1, no = 3)
## Error in `val_labels<-.factor`(`*tmp*`, value = c(yes = 1, no = 3)): Value labels cannot be applied to factors.

You could also apply val_labels() to several columns of a data frame.

df <- data.frame(v1 = 1:3, v2 = c(2, 3, 1), v3 = 3:1)

val_label(df, 1) <- "yes"
val_label(df[, c("v1", "v3")], 2) <- "maybe"
val_label(df[, c("v2", "v3")], 3) <- "no"
val_labels(df)
## $v1
##   yes maybe 
##     1     2 
## 
## $v2
## yes  no 
##   1   3 
## 
## $v3
##   yes maybe    no 
##     1     2     3
val_labels(df[, c("v1", "v3")]) <- c(YES = 1, MAYBE = 2, NO = 3)
val_labels(df)
## $v1
##   YES MAYBE    NO 
##     1     2     3 
## 
## $v2
## yes  no 
##   1   3 
## 
## $v3
##   YES MAYBE    NO 
##     1     2     3
val_labels(df) <- NULL
val_labels(df)
## $v1
## NULL
## 
## $v2
## NULL
## 
## $v3
## NULL
val_labels(df) <- list(v1 = c(yes = 1, no = 3), v2 = c(a = 1, b = 2, c = 3))
val_labels(df)
## $v1
## yes  no 
##   1   3 
## 
## $v2
## a b c 
## 1 2 3 
## 
## $v3
## NULL

Sorting value labels

Value labels are sorted by default in the order they have been created.

v <- c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA)
val_label(v, 1) <- "yes"
val_label(v, 3) <- "no"
val_label(v, 9) <- "refused"
val_label(v, 2) <- "maybe"
val_label(v, 8) <- "don't know"
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9    refused
##      2      maybe
##      8 don't know

It could be useful to reorder the value labels according to their attached values, with sort_val_labels().

sort_val_labels(v)
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      2      maybe
##      3         no
##      8 don't know
##      9    refused
sort_val_labels(v, decreasing = TRUE)
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      9    refused
##      8 don't know
##      3         no
##      2      maybe
##      1        yes

If you prefer, you can also sort them according to the labels.

sort_val_labels(v, according_to = "l")
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      8 don't know
##      2      maybe
##      3         no
##      9    refused
##      1        yes

User defined missing values (SPSS’s style)

haven (>= 2.0.0) introduced an additional haven_labelled_spss class to deal with user defined missing values. In such case, additional attributes will be used to indicate with values should be considered as missing, but such values will not be stored as internal NA values. You should note that most R function will not take this information into account. Therefore, you will have to convert missing values into NA if required before analysis. These defined missing values could co-exist with internal NA values.

It is possible to manipulate this missing values with na_values() and na_range(). Note that is.na() will return TRUE as well for user-defined missing values.

v <- labelled(
  c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA),
  c(yes = 1, no = 3, "don't know" = 9)
)
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know
na_values(v) <- 9
na_values(v)
## [1] 9
v
## <labelled_spss<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## Missing values: 9
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know
is.na(v)
##  [1] FALSE FALSE FALSE FALSE FALSE  TRUE FALSE FALSE FALSE  TRUE
na_values(v) <- NULL
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know
na_range(v) <- c(5, Inf)
na_range(v)
## [1]   5 Inf
v
## <labelled_spss<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## Missing range:  [5, Inf]
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      9 don't know

Since version 2.1.0, it is not mandatory to define at least one value label before defining missing values.

x <- c(1, 2, 2, 9)
na_values(x) <- 9
x
## <labelled_spss<double>[4]>
## [1] 1 2 2 9
## Missing values: 9

To convert user defined missing values into NA, simply use user_na_to_na().

v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10))
v
## <labelled_spss<integer>[10]>
##  [1]  1  2  3  4  5  6  7  8  9 10
## Missing values: 9, 10
## 
## Labels:
##  value label
##      1  Good
##      8   Bad
v2 <- user_na_to_na(v)
v2
## <labelled<integer>[10]>
##  [1]  1  2  3  4  5  6  7  8 NA NA
## 
## Labels:
##  value label
##      1  Good
##      8   Bad

You can also remove user missing values definition without converting these values to NA.

v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10))
v
## <labelled_spss<integer>[10]>
##  [1]  1  2  3  4  5  6  7  8  9 10
## Missing values: 9, 10
## 
## Labels:
##  value label
##      1  Good
##      8   Bad
v2 <- remove_user_na(v)
v2
## <labelled<integer>[10]>
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## Labels:
##  value label
##      1  Good
##      8   Bad

or

v <- labelled_spss(1:10, c(Good = 1, Bad = 8), na_values = c(9, 10))
v
## <labelled_spss<integer>[10]>
##  [1]  1  2  3  4  5  6  7  8  9 10
## Missing values: 9, 10
## 
## Labels:
##  value label
##      1  Good
##      8   Bad
na_values(v) <- NULL
v
## <labelled<integer>[10]>
##  [1]  1  2  3  4  5  6  7  8  9 10
## 
## Labels:
##  value label
##      1  Good
##      8   Bad

Other conversion to NA

In some cases, values who don’t have an attached value label could be considered as missing. nolabel_to_na() will convert them to NA.

v <- labelled(c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA), c(yes = 1, maybe = 2, no = 3))
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value label
##      1   yes
##      2 maybe
##      3    no
nolabel_to_na(v)
## <labelled<double>[10]>
##  [1]  1  2  2  2  3 NA  1  3  2 NA
## 
## Labels:
##  value label
##      1   yes
##      2 maybe
##      3    no

In other cases, a value label is attached only to specific values that corresponds to a missing value. For example:

size <- labelled(c(1.88, 1.62, 1.78, 99, 1.91), c("not measured" = 99))
size
## <labelled<double>[5]>
## [1]  1.88  1.62  1.78 99.00  1.91
## 
## Labels:
##  value        label
##     99 not measured

In such cases, val_labels_to_na() could be appropriate.

val_labels_to_na(size)
## [1] 1.88 1.62 1.78   NA 1.91

These two functions could also be applied to an overall data frame. Only labelled vectors will be impacted.

Converting to factor

A labelled vector could easily be converted to a factor with to_factor().

v <- labelled(
  c(1, 2, 2, 2, 3, 9, 1, 3, 2, NA),
  c(yes = 1, no = 3, "don't know" = 8, refused = 9)
)
v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      8 don't know
##      9    refused
to_factor(v)
##  [1] yes     2       2       2       no      refused yes     no      2      
## [10] <NA>   
## Levels: yes 2 no don't know refused

The levels argument allows to specify what should be used as the factor levels, i.e. the labels (default), the values or the labels prefixed with values.

to_factor(v, levels = "v")
##  [1] 1    2    2    2    3    9    1    3    2    <NA>
## Levels: 1 2 3 8 9
to_factor(v, levels = "p")
##  [1] [1] yes     [2] 2       [2] 2       [2] 2       [3] no      [9] refused
##  [7] [1] yes     [3] no      [2] 2       <NA>       
## Levels: [1] yes [2] 2 [3] no [8] don't know [9] refused

The ordered argument will create an ordinal factor.

to_factor(v, ordered = TRUE)
##  [1] yes     2       2       2       no      refused yes     no      2      
## [10] <NA>   
## Levels: yes < 2 < no < don't know < refused

The argument nolabel_to_na specify if the corresponding function should be applied before converting to a factor. Therefore, the two following commands are equivalent.

to_factor(v, nolabel_to_na = TRUE)
##  [1] yes     <NA>    <NA>    <NA>    no      refused yes     no      <NA>   
## [10] <NA>   
## Levels: yes no don't know refused
to_factor(nolabel_to_na(v))
##  [1] yes     <NA>    <NA>    <NA>    no      refused yes     no      <NA>   
## [10] <NA>   
## Levels: yes no don't know refused

sort_levels specifies how the levels should be sorted: "none" to keep the order in which value labels have been defined, "values" to order the levels according to the values and "labels" according to the labels. "auto" (default) will be equivalent to "none" except if some values with no attached labels are found and are not dropped. In that case, "values" will be used.

to_factor(v, sort_levels = "n")
##  [1] yes     2       2       2       no      refused yes     no      2      
## [10] <NA>   
## Levels: yes no don't know refused 2
to_factor(v, sort_levels = "v")
##  [1] yes     2       2       2       no      refused yes     no      2      
## [10] <NA>   
## Levels: yes 2 no don't know refused
to_factor(v, sort_levels = "l")
##  [1] yes     2       2       2       no      refused yes     no      2      
## [10] <NA>   
## Levels: 2 don't know no refused yes

The function to_labelled() could be used to turn a factor into a labelled numeric vector.

f <- factor(1:3, labels = c("a", "b", "c"))
to_labelled(f)
## <labelled<double>[3]>
## [1] 1 2 3
## 
## Labels:
##  value label
##      1     a
##      2     b
##      3     c

Note that to_labelled(to_factor(v)) will not be equal to v due to the way factors are stored internally by R.

v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      8 don't know
##      9    refused
to_labelled(to_factor(v))
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  5  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      2          2
##      3         no
##      4 don't know
##      5    refused

Other type of conversions

You can use to_character() for converting into a character vector instead of a factor.

v
## <labelled<double>[10]>
##  [1]  1  2  2  2  3  9  1  3  2 NA
## 
## Labels:
##  value      label
##      1        yes
##      3         no
##      8 don't know
##      9    refused
to_character(v)
##  [1] "yes"     "2"       "2"       "2"       "no"      "refused" "yes"    
##  [8] "no"      "2"       NA

To remove the haven_class, you can simply use unclass().

unclass(v)
##  [1]  1  2  2  2  3  9  1  3  2 NA
## attr(,"labels")
##        yes         no don't know    refused 
##          1          3          8          9

Note that value labels will be preserved as an attribute to the vector.

remove_val_labels(v)
##  [1]  1  2  2  2  3  9  1  3  2 NA

To remove value labels, use remove_val_labels().

remove_val_labels(v)
##  [1]  1  2  2  2  3  9  1  3  2 NA

Note that if your vector does have user-defined missing values, you may also want to use remove_user_na().

x <- c(1, 2, 2, 9)
na_values(x) <- 9
val_labels(x) <- c(yes = 1, no = 2)
var_label(x) <- "A test variable"
x
## <labelled_spss<double>[4]>: A test variable
## [1] 1 2 2 9
## Missing values: 9
## 
## Labels:
##  value label
##      1   yes
##      2    no
remove_val_labels(x)
## <labelled_spss<double>[4]>: A test variable
## [1] 1 2 2 9
## Missing values: 9
remove_user_na(x)
## <labelled<double>[4]>: A test variable
## [1] 1 2 2 9
## 
## Labels:
##  value label
##      1   yes
##      2    no
remove_user_na(x, user_na_to_na = TRUE)
## <labelled<double>[4]>: A test variable
## [1]  1  2  2 NA
## 
## Labels:
##  value label
##      1   yes
##      2    no
remove_val_labels(remove_user_na(x))
## [1] 1 2 2 9
## attr(,"label")
## [1] "A test variable"
unclass(x)
## [1] 1 2 2 9
## attr(,"labels")
## yes  no 
##   1   2 
## attr(,"na_values")
## [1] 9
## attr(,"label")
## [1] "A test variable"

You can remove all labels and user-defined missing values with remove_labels(). Use keep_var_label = TRUE to preserve only variable label.

remove_labels(x, user_na_to_na = TRUE)
## [1]  1  2  2 NA
remove_labels(x, user_na_to_na = TRUE, keep_var_label = TRUE)
## [1]  1  2  2 NA
## attr(,"label")
## [1] "A test variable"

Conditional conversion to factors

For any analysis, it is the responsibility of user to identify which labelled numeric vectors should be considered as categorical (and therefore converted into factors using to_factor()) and which variables should be treated as continuous (and therefore unclassed into numeric using base::unclass()).

It should be noted that most functions expect categorical variables to be coded as factors. It includes most modeling functions (such as stats::lm() or stats::glm()) or plotting functions from ggplot2.

In most of cases, if data documentation was properly done, categorical variables corresponds to vectors where all observed values have a value label while vectors where only few values have a value label should be considered as continuous.

In that situation, you could apply the unlabelled() method to an overall data frame. By default, unlabelled() works as follow:

  • if a column doesn’t inherit the haven_labelled class, it will be not affected;
  • if all observed values have a corresponding value label, the column will be converted into a factor (using to_factor());
  • otherwise, the column will be unclassed (and converted back to a numeric or character vector by applying base::unclass()).
df <- data.frame(
  a = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2)),
  b = labelled(c(1, 1, 2, 3), labels = c(No = 1, Yes = 2, DK = 3)),
  c = labelled(c(1, 1, 2, 2), labels = c(No = 1, Yes = 2, DK = 3)),
  d = labelled(c("a", "a", "b", "c"), labels = c(No = "a", Yes = "b")),
  e = labelled_spss(
    c(1, 9, 1, 2),
    labels = c(No = 1, Yes = 2),
    na_values = 9
    )
)
df %>% look_for()
##  pos variable label col_type missing values 
##  1   a        —     dbl+lbl  0       [1] No 
##                                      [2] Yes
##  2   b        —     dbl+lbl  0       [1] No 
##                                      [2] Yes
##                                      [3] DK 
##  3   c        —     dbl+lbl  0       [1] No 
##                                      [2] Yes
##                                      [3] DK 
##  4   d        —     chr+lbl  0       [a] No 
##                                      [b] Yes
##  5   e        —     dbl+lbl  1       [1] No 
##                                      [2] Yes
unlabelled(df) %>% look_for()
##  pos variable label col_type missing values
##  1   a        —     dbl      0             
##  2   b        —     fct      0       No    
##                                      Yes   
##                                      DK    
##  3   c        —     fct      0       No    
##                                      Yes   
##                                      DK    
##  4   d        —     chr      0             
##  5   e        —     fct      1       No    
##                                      Yes
unlabelled(df, user_na_to_na = TRUE) %>% look_for()
##  pos variable label col_type missing values
##  1   a        —     dbl      0             
##  2   b        —     fct      0       No    
##                                      Yes   
##                                      DK    
##  3   c        —     fct      0       No    
##                                      Yes   
##                                      DK    
##  4   d        —     chr      0             
##  5   e        —     fct      1       No    
##                                      Yes
unlabelled(df, drop_unused_labels = TRUE) %>% look_for()
##  pos variable label col_type missing values
##  1   a        —     dbl      0             
##  2   b        —     fct      0       No    
##                                      Yes   
##                                      DK    
##  3   c        —     fct      0       No    
##                                      Yes   
##  4   d        —     chr      0             
##  5   e        —     fct      1       No    
##                                      Yes

Importing labelled data

In haven package, read_spss, read_stata and read_sas are natively importing data using the labelled class and the label attribute for variable labels.

Functions from foreign package could also import some metadata from SPSS and Stata files. to_labelled can convert data imported with foreign into a labelled data frame. However, there are some limitations compared to using haven:

  • For SPSS files, it will be better to set use.value.labels = FALSE, to.data.frame = FALSE and use.missings = FALSE when calling read.spss. If use.value.labels = TRUE, variable with value labels will be converted into factors by read.spss (and kept as factors by foreign_to_label). If to.data.frame = TRUE, meta data describing the missing values will not be imported. If use.missings = TRUE, missing values would have been converted to NA by read.spss.
  • For Stata files, set convert.factors = FALSE when calling read.dta to avoid conversion of variables with value labels into factors. So far, missing values defined in Stata are always imported as NA by read.dta and could not be retrieved by foreign_to_labelled.

The memisc package provide functions to import variable metadata and store them in specific object of class data.set. The to_labelled method can convert a data.set into a labelled data frame.

  # from foreign
  library(foreign)
  df <- to_labelled(read.spss(
    "file.sav",
    to.data.frame = FALSE,
    use.value.labels = FALSE,
    use.missings = FALSE
 ))
 df <- to_labelled(read.dta(
   "file.dta",
   convert.factors = FALSE
 ))

 # from memisc
 library(memisc)
 nes1948.por <- UnZip("anes/NES1948.ZIP", "NES1948.POR", package = "memisc")
 nes1948 <- spss.portable.file(nes1948.por)
 df <- to_labelled(nes1948)
 ds <- as.data.set(nes19480)
 df <- to_labelled(ds)

Using labelled with dplyr/magrittr

If you are using the %>% operator, you can use the functions set_variable_labels(), set_value_labels(), add_value_labels() and remove_value_labels().

library(dplyr)
## 
## Attachement du package : 'dplyr'
## Les objets suivants sont masqués depuis 'package:stats':
## 
##     filter, lag
## Les objets suivants sont masqués depuis 'package:base':
## 
##     intersect, setdiff, setequal, union
df <- data_frame(s1 = c("M", "M", "F"), s2 = c(1, 1, 2)) %>%
  set_variable_labels(s1 = "Sex", s2 = "Question") %>%
  set_value_labels(s1 = c(Male = "M", Female = "F"), s2 = c(Yes = 1, No = 2))
## Warning: `data_frame()` was deprecated in tibble 1.1.0.
## ℹ Please use `tibble()` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
df$s2
## <labelled<double>[3]>: Question
## [1] 1 1 2
## 
## Labels:
##  value label
##      1   Yes
##      2    No

set_value_labels() will replace the list of value labels while add_value_labels() will update it.

df <- df %>%
  set_value_labels(s2 = c(Yes = 1, "Don't know" = 8, Unknown = 9))
df$s2
## <labelled<double>[3]>: Question
## [1] 1 1 2
## 
## Labels:
##  value      label
##      1        Yes
##      8 Don't know
##      9    Unknown
df <- df %>%
  add_value_labels(s2 = c(No = 2))
df$s2
## <labelled<double>[3]>: Question
## [1] 1 1 2
## 
## Labels:
##  value      label
##      1        Yes
##      8 Don't know
##      9    Unknown
##      2         No

You can also remove some variable and/or value labels.

df <- df %>%
  set_variable_labels(s1 = NULL)

# removing one value label
df <- df %>%
  remove_value_labels(s2 = 2)
df$s2
## <labelled<double>[3]>: Question
## [1] 1 1 2
## 
## Labels:
##  value      label
##      1        Yes
##      8 Don't know
##      9    Unknown
# removing several value labels
df <- df %>%
  remove_value_labels(s2 = 8:9)
df$s2
## <labelled<double>[3]>: Question
## [1] 1 1 2
## 
## Labels:
##  value label
##      1   Yes
# removing all value labels
df <- df %>%
  set_value_labels(s2 = NULL)
df$s2
## [1] 1 1 2
## attr(,"label")
## [1] "Question"

To convert variables, the easiest is to use unlabelled().

library(questionr)
data(fertility)
glimpse(women)
## Rows: 2,000
## Columns: 17
## $ id_woman          <dbl> 391, 1643, 85, 881, 1981, 1072, 1978, 1607, 738, 165…
## $ id_household      <dbl> 381, 1515, 85, 844, 1797, 1015, 1794, 1486, 711, 152…
## $ weight            <dbl> 1.803150, 1.803150, 1.803150, 1.803150, 1.803150, 0.…
## $ interview_date    <date> 2012-05-05, 2012-01-23, 2012-01-21, 2012-01-06, 201…
## $ date_of_birth     <date> 1997-03-07, 1982-01-06, 1979-01-01, 1968-03-29, 198…
## $ age               <dbl> 15, 30, 33, 43, 25, 18, 45, 23, 49, 31, 26, 45, 25, …
## $ residency         <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ region            <dbl+lbl> 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, …
## $ instruction       <dbl+lbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 0, …
## $ employed          <dbl+lbl> 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, …
## $ matri             <dbl+lbl> 0, 2, 2, 2, 1, 0, 1, 1, 2, 5, 2, 3, 0, 2, 1, 2, …
## $ religion          <dbl+lbl> 1, 3, 2, 3, 2, 2, 3, 1, 3, 3, 2, 3, 2, 2, 2, 2, …
## $ newspaper         <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ radio             <dbl+lbl> 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, …
## $ tv                <dbl+lbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, …
## $ ideal_nb_children <dbl+lbl>  4,  4,  4,  4,  4,  5, 10,  5,  4,  5,  6, 10, …
## $ test              <dbl+lbl> 0, 9, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, …
glimpse(women %>% unlabelled())
## Rows: 2,000
## Columns: 17
## $ id_woman          <dbl> 391, 1643, 85, 881, 1981, 1072, 1978, 1607, 738, 165…
## $ id_household      <dbl> 381, 1515, 85, 844, 1797, 1015, 1794, 1486, 711, 152…
## $ weight            <dbl> 1.803150, 1.803150, 1.803150, 1.803150, 1.803150, 0.…
## $ interview_date    <date> 2012-05-05, 2012-01-23, 2012-01-21, 2012-01-06, 201…
## $ date_of_birth     <date> 1997-03-07, 1982-01-06, 1979-01-01, 1968-03-29, 198…
## $ age               <dbl> 15, 30, 33, 43, 25, 18, 45, 23, 49, 31, 26, 45, 25, …
## $ residency         <fct> rural, rural, rural, rural, rural, rural, rural, rur…
## $ region            <fct> West, West, West, West, West, South, South, South, S…
## $ instruction       <fct> none, none, none, none, primary, none, none, none, n…
## $ employed          <fct> yes, yes, no, yes, yes, no, yes, no, yes, yes, yes, …
## $ matri             <fct> single, living together, living together, living tog…
## $ religion          <fct> Muslim, Protestant, Christian, Protestant, Christian…
## $ newspaper         <fct> no, no, no, no, no, no, no, no, no, no, no, no, no, …
## $ radio             <fct> no, yes, yes, no, no, yes, yes, no, no, no, yes, yes…
## $ tv                <fct> no, no, no, no, no, yes, no, no, no, no, yes, yes, n…
## $ ideal_nb_children <dbl> 4, 4, 4, 4, 4, 5, 10, 5, 4, 5, 6, 10, 2, 6, 6, 6, 4,…
## $ test              <fct> no, missing, no, no, yes, no, no, no, no, yes, yes, …

Alternatively, you can use functions as dplyr::mutate_if() or dplyr::mutate_at(). See the example below.

glimpse(to_factor(women))
## Rows: 2,000
## Columns: 17
## $ id_woman          <dbl> 391, 1643, 85, 881, 1981, 1072, 1978, 1607, 738, 165…
## $ id_household      <dbl> 381, 1515, 85, 844, 1797, 1015, 1794, 1486, 711, 152…
## $ weight            <dbl> 1.803150, 1.803150, 1.803150, 1.803150, 1.803150, 0.…
## $ interview_date    <date> 2012-05-05, 2012-01-23, 2012-01-21, 2012-01-06, 201…
## $ date_of_birth     <date> 1997-03-07, 1982-01-06, 1979-01-01, 1968-03-29, 198…
## $ age               <dbl> 15, 30, 33, 43, 25, 18, 45, 23, 49, 31, 26, 45, 25, …
## $ residency         <fct> rural, rural, rural, rural, rural, rural, rural, rur…
## $ region            <fct> West, West, West, West, West, South, South, South, S…
## $ instruction       <fct> none, none, none, none, primary, none, none, none, n…
## $ employed          <fct> yes, yes, no, yes, yes, no, yes, no, yes, yes, yes, …
## $ matri             <fct> single, living together, living together, living tog…
## $ religion          <fct> Muslim, Protestant, Christian, Protestant, Christian…
## $ newspaper         <fct> no, no, no, no, no, no, no, no, no, no, no, no, no, …
## $ radio             <fct> no, yes, yes, no, no, yes, yes, no, no, no, yes, yes…
## $ tv                <fct> no, no, no, no, no, yes, no, no, no, no, yes, yes, n…
## $ ideal_nb_children <fct> 4, 4, 4, 4, 4, 5, 10, 5, 4, 5, 6, 10, 2, 6, 6, 6, 4,…
## $ test              <fct> no, missing, no, no, yes, no, no, no, no, yes, yes, …
glimpse(women %>% mutate_if(is.labelled, to_factor))
## Rows: 2,000
## Columns: 17
## $ id_woman          <dbl> 391, 1643, 85, 881, 1981, 1072, 1978, 1607, 738, 165…
## $ id_household      <dbl> 381, 1515, 85, 844, 1797, 1015, 1794, 1486, 711, 152…
## $ weight            <dbl> 1.803150, 1.803150, 1.803150, 1.803150, 1.803150, 0.…
## $ interview_date    <date> 2012-05-05, 2012-01-23, 2012-01-21, 2012-01-06, 201…
## $ date_of_birth     <date> 1997-03-07, 1982-01-06, 1979-01-01, 1968-03-29, 198…
## $ age               <dbl> 15, 30, 33, 43, 25, 18, 45, 23, 49, 31, 26, 45, 25, …
## $ residency         <fct> rural, rural, rural, rural, rural, rural, rural, rur…
## $ region            <fct> West, West, West, West, West, South, South, South, S…
## $ instruction       <fct> none, none, none, none, primary, none, none, none, n…
## $ employed          <fct> yes, yes, no, yes, yes, no, yes, no, yes, yes, yes, …
## $ matri             <fct> single, living together, living together, living tog…
## $ religion          <fct> Muslim, Protestant, Christian, Protestant, Christian…
## $ newspaper         <fct> no, no, no, no, no, no, no, no, no, no, no, no, no, …
## $ radio             <fct> no, yes, yes, no, no, yes, yes, no, no, no, yes, yes…
## $ tv                <fct> no, no, no, no, no, yes, no, no, no, no, yes, yes, n…
## $ ideal_nb_children <fct> 4, 4, 4, 4, 4, 5, 10, 5, 4, 5, 6, 10, 2, 6, 6, 6, 4,…
## $ test              <fct> no, missing, no, no, yes, no, no, no, no, yes, yes, …
glimpse(women %>% mutate_at(vars(employed:religion), to_factor))
## Rows: 2,000
## Columns: 17
## $ id_woman          <dbl> 391, 1643, 85, 881, 1981, 1072, 1978, 1607, 738, 165…
## $ id_household      <dbl> 381, 1515, 85, 844, 1797, 1015, 1794, 1486, 711, 152…
## $ weight            <dbl> 1.803150, 1.803150, 1.803150, 1.803150, 1.803150, 0.…
## $ interview_date    <date> 2012-05-05, 2012-01-23, 2012-01-21, 2012-01-06, 201…
## $ date_of_birth     <date> 1997-03-07, 1982-01-06, 1979-01-01, 1968-03-29, 198…
## $ age               <dbl> 15, 30, 33, 43, 25, 18, 45, 23, 49, 31, 26, 45, 25, …
## $ residency         <dbl+lbl> 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, …
## $ region            <dbl+lbl> 4, 4, 4, 4, 4, 3, 3, 3, 3, 3, 3, 3, 2, 2, 2, 2, …
## $ instruction       <dbl+lbl> 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 1, 1, 2, 1, 0, …
## $ employed          <fct> yes, yes, no, yes, yes, no, yes, no, yes, yes, yes, …
## $ matri             <fct> single, living together, living together, living tog…
## $ religion          <fct> Muslim, Protestant, Christian, Protestant, Christian…
## $ newspaper         <dbl+lbl> 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, …
## $ radio             <dbl+lbl> 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1, 1, 1, 1, 1, 0, …
## $ tv                <dbl+lbl> 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 0, 0, …
## $ ideal_nb_children <dbl+lbl>  4,  4,  4,  4,  4,  5, 10,  5,  4,  5,  6, 10, …
## $ test              <dbl+lbl> 0, 9, 0, 0, 1, 0, 0, 0, 0, 1, 1, 0, 0, 1, 1, 0, …
labelled/inst/WORDLIST0000644000176200001440000000041614444546070014176 0ustar liggesusersCMD Cheatsheet Codecov DOI Lifecycle RStudio Recode SPSS's Stata briatte cheatsheet df dictionnary dplyr gmail joseph larmarange magrittr memisc na natively recode recoded recoding roxygen spss tbl tibble tibbles tidyr unclassed untagged