sjlabelled/0000755000176200001440000000000014046513502012353 5ustar liggesuserssjlabelled/NAMESPACE0000644000176200001440000000374714046506714013614 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method("set_label<-",default) S3method(as_character,data.frame) S3method(as_character,default) S3method(as_factor,data.frame) S3method(as_factor,default) S3method(as_label,data.frame) S3method(as_label,default) S3method(as_labelled,data.frame) S3method(as_labelled,default) S3method(as_labelled,list) S3method(as_numeric,data.frame) S3method(as_numeric,default) S3method(get_label,data.frame) S3method(get_label,default) S3method(get_label,list) S3method(get_labels,data.frame) S3method(get_labels,default) S3method(get_labels,list) S3method(get_na,data.frame) S3method(get_na,default) S3method(get_na,list) S3method(get_values,data.frame) S3method(get_values,default) S3method(get_values,list) S3method(remove_all_labels,data.frame) S3method(remove_all_labels,default) S3method(remove_all_labels,list) export("set_label<-") export(add_labels) export(as_character) export(as_factor) export(as_label) export(as_labelled) export(as_numeric) export(convert_case) export(copy_labels) export(drop_labels) export(fill_labels) export(get_dv_labels) export(get_label) export(get_labels) export(get_na) export(get_term_labels) export(get_values) export(is_labelled) export(label_to_colnames) export(read_data) export(read_sas) export(read_spss) export(read_stata) export(remove_all_labels) export(remove_label) export(remove_labels) export(replace_labels) export(response_labels) export(set_label) export(set_labels) export(set_na) export(term_labels) export(tidy_labels) export(to_character) export(to_factor) export(to_label) export(to_numeric) export(unlabel) export(val_labels) export(var_labels) export(write_sas) export(write_spss) export(write_stata) export(zap_labels) export(zap_na_tags) export(zap_unlabelled) importFrom(insight,find_parameters) importFrom(insight,get_data) importFrom(stats,coef) importFrom(stats,model.frame) importFrom(stats,na.omit) importFrom(stats,terms) importFrom(tools,file_ext) importFrom(utils,setTxtProgressBar) importFrom(utils,txtProgressBar) sjlabelled/README.md0000644000176200001440000000353413733137543013650 0ustar liggesusers# sjlabelled - Labelled Data Utility Functions [![CRAN_Status_Badge](https://www.r-pkg.org/badges/version/sjlabelled)](https://cran.r-project.org/package=sjlabelled)    [![Documentation](https://img.shields.io/badge/documentation-sjlabelled-orange.svg?colorB=E91E63)](https://strengejacke.github.io/sjlabelled/)    [![downloads](https://cranlogs.r-pkg.org/badges/sjlabelled)](https://cranlogs.r-pkg.org/)    [![total](https://cranlogs.r-pkg.org/badges/grand-total/sjlabelled)](https://cranlogs.r-pkg.org/) This package contains utility functions that are useful when working with labelled data (especially intended for people coming from 'SPSS', 'SAS' or 'Stata' and/or who are new to R). Basically, this package covers reading and writing data between other statistical packages (like 'SPSS') and R, based on the haven and foreign packages; hence, this package also includes functions to make working with labelled data easier. This includes easy ways to get, set or change value and variable label attributes, to convert labelled vectors into factors or numeric (and vice versa), or to deal with multiple declared missing values. ## Installation ### Latest development build To install the latest development snapshot (see latest changes below), type following commands into the R console: ```r library(devtools) devtools::install_github("strengejacke/sjlabelled") ``` ### Officiale, stable release To install the latest stable release from CRAN, type following command into the R console: ```r install.packages("sjlabelled") ``` ## Citation In case you want / have to cite my package, please use `citation('sjlabelled')` for citation information. [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1249215.svg)](https://doi.org/10.5281/zenodo.1249215) sjlabelled/data/0000755000176200001440000000000013446531213013266 5ustar liggesuserssjlabelled/data/efc.RData0000644000176200001440000004213413446531213014744 0ustar liggesusers՝]qߏIJ9 1*TRhjIlHI)AIˡ. ÿ( p/gι V_3ܻKw{{򉧺'ϧ?.|rsXq{?9/ pӍ~<=|o11}z G 7~g9Pv^1r<371?Wü+1y5KA FO> }z6̼cz'ZcB?AS|n<_#}DXK?3UӍaEG||] |+w< A#:;wgc="˾9GUҏqƥ_zKBn#ZW?6<_Q/`ޛh ~艾_"e܏OyѮ71ۀE@# :_ A#K q?x9/{s{ u`a܈@z^MSqsK#O1ޫh˟T1_H/z^釶zM?qߒCr)owcݠ/OET_}ճʺ'fK鳀u81ƕW*QrI^': <1X2?'ᾥy)=~3?Yw=v'/bu\puR;_A .\>E)oH?W/E:G[+u~-d?׸W(uR?!}|:W?-swGWP/;U|kS?\RP柕2x.O|y;=q!wCyP5@/> Aj?ެKXKq8к>SYgJWЯ~+qd\K{ ^tq<ȼsYi\tY${J ^J: ΒCr3I?kS1#7|mٝ 9#zzu%_=S?1{s}y?I/8[xCGXǯοf!A~k|n>on>:v6[w;l]W\v`C7^̍L_Զ|kV@Jyj嶰7Vg?/l?fgW`oIwuH;.k!;;%kGx2N0?'puvz6`I^uwߟM7<0VA7rvj7K|&~ .6o&H?v=̷I?ۼ}7Od$/+`~>o#:\?3-7Q>g`nѿB?f2l5nƃf2щtun?ߏۡ?ߣ 4 q)Xv[</;y-{+}26%g7 ?|Y?ܻ:9\morT==; j|zx/o>]k7}w뻷?y֣ova7?ڽ}p2?:\HkwHÓWǷ6n|{\F:_S_ȢnVhvnWbr̕^R0SKuVuprΕ{vY:s6͛ۆu_t\nn0Wu+Wv}N.VV[+G_;\j#"_ ʿz{4ޚOxY~54>KieG߶?.g-VX[_,pf,]ϭ"zrDmG[~\VX{XJmוKvwy헭c-ut|սُl#ڿ9^}9(jt r?<1Py^7޹OūC~s@߱<8_\?7H. o[wc“<?%:])'r@Kø~Sn'9 >/<ѻ*yGE^]I_>nS阯"=eOqihz4^3›Uz8a\~#{79ܢϗ.)@%\c}/4̧h=Dڲ[mtJԺmK[_zNW Oo?1yWX.ҁOIGqvi#vYDK>`=WP?+qq?!bn/yOr֩Oz_'؍aON|$Ax%/Pv,~=oG%7a>^;ҫ D)zЖi=$&bn.P9[.%+~y+%N/'zN0_v+tcyJ>C'&@G1?3r]p?r2%ҁ?RzOc~ܷ5z~Yߍ^% mօ$!'g_X_^zI<گ/}%~yMyWx|`/r7o~'7_<ެ/qֳzL]%<9x~;OdYx{n7SRIu>wB(zЯ3gQX⇐@yq(~cXп̸諮[9o/<%+w uhړs`ǐ1^YeC /\WħY`?Ճ83wg[z +yӑ<ϕ~Wa4\][n,Oo}JI5uԖ+tYG{.vb r%~C?I}MУnL笲Ϟ^8/׼r4cXn=sm?(x#XG_ %L@KuqCx^;cy$nHo3%}q{ ޿yxߡ]M~re=%K?r,|ov?HO#`yd^^Kٻ^ےR'Tfpc vt0^B>NGϭ#؟{)z.dGD wh;?ҋ镍xrK:(m [0._NHHO'wmܸ=Y9H?қrDuRkE9\6Ku#9"ġ+5ym~ y}*gh◜M؁~4߬[m} m'_m5P7{Ooȅ{9c/7o#*g; n4-ڢ>_6xjkf7|\a$g>Y|E$~oժwEszfv~|%ή ڭvM-$lt,Z9:VIq۹v'ed_W?N~VxY/rY9"~%R޺N<],,.h8590]_3Wv{kdY*:yɟx?+_>Ѻf[+F"<G7?/͏=jj_H?U_`mUY+ja<‹dyuvuCϬ]+^\?\}yx{ucٿk6/6յݣͧ{o~o| thg0zxd?/v<ܟݖz9`2~i6r[zce$_em>\yj#W_Q^Ot1ʩJ }3"lg2ڼG+-C_DՋ= =G"G=Vێ֞W|y]|*[;_9uhk]泥9N/|Ζ1vi=':j9P.;wzWf?N8qէwALt}e;9֏sa^6un=lu~N_t|:9<\]|{pnk)>%744ߝ)"]%^oHVkqU+=f)̋Bx1~fE>B>OI{C70+md%lu^J k)[4Wi=HZ6E둎p3M;QOKw6txxpշ?yǵ'}[xR狭#EYgga>x#T{xw޵'{wG~z:ܻbG{Ǐ %x<sQv|zT񫽊Ke֫HW31JZzH/*%R/Ֆh|55m/[$m_F7ˑJq2K?|5o?+Wv^6+;/h'Yfam !˿8ڼ[$-S7yl})oWЬ<曵cz=gr#y'x= I?N?8GV~VN>'W׆o?[g;݈_ϭ/ οnK{mK'p_M~z: Oh8yulɵZ+O8 U>wnqvOFn?;?zʏQse */̛{^((ݓܸv:޺޵id- z}8^I/SR/d[ԥv #k_a Ul<>eyBIjdo'owT_)"vmK@'WVh>kߢZ׃rYX?ݣ^G%wm;M {8 k"ۊʶ||Nɷz|uw"g{_>~ORkw4e3*W{ίl]Z2}^3xAU脪̸.Ym󣂭q=t ?HV9kS+GMS?ǣ7"͒RO~Xק\9֣^󼾍Ow%ǶjG.|{>ϛ_zlIj=sl-[[A]O/OB'[ӳUpi:o=/={G}%WX_ok^j]Gn'|6Wm{\8΋߈g?I۳;=?z<-M[[xقrUjedx{k1{% L~o'6~ꓵSѺ?(/M8^|#wq'/ȷKtuCv_sr̿]5o18:a~ݝ mIʪ%+_z3'x٧=:kS@ڪ:?g#7'r8}Ο뇑fuH~#YldKϵu{jd_FO{3j[y#ɗpOo?FlkWϤu4ċ괰 }W։lgw"o@7ʱSr=~e-ț1Kl,:inuxۨsGlyYhK;f|v=CF=K-n/#[r'yqUS{ѭo|re[Ns`^<`__{tuc_޺ny9zmm }4vƈ o=+y\HE+od0?^tCZäAn }(g7Ͼo?2. qke>`?}:c$'ac=Pwv6,'Fj5k$K406HwN.rTog3ڳô$l>o͏s`:?ϯW]gwnd_tԝ]xZ us2Nxpqum -a/uJxټh7ylU{ya=A:A^ehlN=_$w+ >ub7 }ڀ({5'Gz٩3'njy"pl3սُK۷nl>ruhospGOo~rxh|a1~ana)7,H%)Oh^q7J#g ތGKo4zFG|Xtt+WV\1=>ɋ#[<8fGnz$ED 9l1c=D$(oZ?-ʳQQߏa8ʧ[NroY97Ѹrg}>+7z&%{ߛ~dvcKᅣʼn|K97y/H>=g=[uG:Oh䭮ϻ) pޢ'ߥOU~>@WyO\j-.<,|~O'~\oѹ2~>֥0~E|J<hk=^Ez0>-<Y?};g,ZG# tdwe? ⧼!. 7\e=ŕK֛y!AtOP~Z e7_w:SkZz x%Otxn.yFrgݸ_)ڥ%ɾokOW.|8W呝ؖ]ŷC~JP~% m/NmzJn[|7ɏ8)'d0/DF~ŋݹ>Gޜ|}u/>^Zi='9 PQPq-ܲV~?2.Z_~]۬/~7[8My4&׾U~zZo5Kayߢ/{eOkHѓZ?~7{].{hRgJ'[= /S?CAy03@?_tx ۏ15]G+3~yZoK/Cvơ_o99[>_o/џCT\Zq]ŏyMtyK_A<_wӱ|7Z֡K9d?7!4_<?+~> ޝ_rtc>̃?] 8_B-91/Z|uȯ?HٕH֏Zr^> &;JN_8:*.e_`(Uih(?)?~S^̓Z5T?U. zNPuH9QفuIr)ym~/g~,}.O0./qo}U's=&Gʝ1]y5=,Жh}ӱ>W<%<8w~,z7O7Y tE7 KQ|q/mn-ʹGa[m+/so{Z kV+ݬZ˥٥_f#p)o7KQ?n~}dqn>*y{㳟FǫO77Gr_|bԯw77}-+sZdjA}ӺY?TٺٽniŏZ~m}PI:Y9y#:q\ܖO3H~>W`Y'!n|v0K&+|yܼu'yynߚjoy9jWoe<[럵v\*oF~ZKGs~Mg);ͷ|ZtnܺfnTim[Rȍg8noךg?=OG{g\|b*ڙ_}pד_n{S ~XM oBw6v0߀~/!~C"i==TM6=#='ziOKoRz: }'~}f7_cbo =d#?zn_)!o+/ /2BWj=} q#?xxq["_yA8 _0O|4q$<͗Ԗj3j3S?}3.zCrreGӏh}v1%%y[m o?^C?\ Z_ E z7ySWu#go$旿n||A3%yDr0иGڕu1e7>yc~$%=92_Ň%-y/_rNqyN}InIx̷_iG~)YGބS^~OPzf9(9eWm"q? 3ga~)3?. _AB?a|g''<楫<_п>Ʃח~Tv\<0nW˼+> _֫Oc\s&4yJ|ȼI:AgisySOw'S?%GE#c޽yoc>9nߡYr`=C=9a\:!ƙ;rn1.3o\|P^ek<Żg;c*ƕאzmVo_~7N>70WA76_8P/{\ y(lOm{~#Wr<_\l2.+.oɲ= ~E|_|Ux0_~sZO-\y/a>?C~C;пwxỤm^/ڃs/<~#uz㤻}(w_a5θѸ:b>=2oH~o:r=W/;t5.zcvg\oŗ?EP{'ѡ=o2}N\O̗'\vD~.=޴ x\/;=7ig]M;0|/WL?Au)/ggޣo(%7W/n>Ÿd[8bGH}is˿k3YH^ {S </~:M|Wv"㹚Jwu7Rnݙ8(huO3@un_\dڢCeJ}俬cO9dc>sʸqu'ϛNb]]6S>wwx]ar.Et_dn_y sk>5Ps8yݹٕʸ&ڍqF?qu#v>t 3.p\=-|9{Gv#9tyu0L?$?w>]1E95׍\}ud?ӯ}c]H.|s\MʟÂyB'}gv;zmwl%O2]2ߺ{<^Sy#y9̘~-@!b_|ڑv7v/ :2;0`~K'sv>?}j^wv^.U} _O+9hmw} ϯ㮎wN;gcwp>;qĽgϼGݜ|FyCGyw>tŗ{n LsuoszoQrw~pu9K}Y;yב~9ǣM\1\tujGuGdwrg}ףs~Wq^;;[eqxwoo?xIn<̠T!02_ 3Y|9--9fZ+%rݞOwq7yC'?W=^󝬣i_'kԓ)|=Y^i~2џտc'@k7)]>>M>rM~+1EyK._c3-Ӟ.nWGoOC8aqXkxw_8?+/Vgsybޡ;9t}1{|"|ܿkW߽.y?Wz=)|^FF}ǟwpr3/Ǽf` ?EƺOv:_tײyG={oꗞy+RN=KvdRaŁzJ% hx>ղ3w0y=.aAs.9<oSx<*._~ui]WЇ_'Cɟyc/ o=vr&~7=ɗ3Ty|_c=~x 9<+~6%<%W:Dk_wp|N<ڼ'~O=X2_puwoځwncJxܟd:/Gre\ODGy/FgtG#zԏ<Wx=Eɟtytksu;܌ϣxB]\B'/W7V{76W&3}*݇џtxsJi]~H7u2McՌ'Пfe>ۢms^;?:O.o~h?rhϲٹVߵ_l܏Շ~j]dX K.?y˵zq&y>=w_繥?p80Zow>o͇/&N--\}wEGYz6>y}j]>Zw;>a[^~tO=ãE5ҟm~x~:9Nn3M:du9>9ڻvwx0d?bF'7yyxǿ<66| ?cUOcXCI?k>\r{#wzB??o=|}aO瑾>r"΢?LyîOwrkZƥw=`;ƹgOeW>?Z:wo=Sظi=!]kW6] 8^dZ=|ܼ}֭&~$G6~D6l>'v#|~_~%~Z~SM =G7+'.ټT~D5`m}\Ol{׋ڻZY:s[yqލvuz#:}rMo#?gUi[o9dv7,q~ާm6.w,ݹ뜥Ϛ08B\eWdS6nRugxɺjB|3[ɞgܼzn ߻9^8kHoğ]GH{sc'9O ΍DA<__wm]q|#;dOvϞkΝ=H} '}'_'KJK]Z;z}j7"'[OFi=Om|ϞsO30eT=ۓT#?o[D=':czig g=7+v ?Or|8x._c|kE%eO&{޻~=YxFKgӟv>x*rѶwE>QY {aq{qY?6gO=KC^Fބ[6|QvqvrLrx[zj^/FFGyr\U&v(<nݢuO>Dkտ>O"y9//ʇv6Nz/ҋ|s5RQYg˻|#{8ΞV`}ks-}g.Q=ЪeNX~VʇKom>qtjjK!~]!jo_{;̷3F٫v=<5x=[Ng,kn|q"!ͻN^]-0̇s풥ptn=E@N(.UG@WQ^7k0 #[ŮKwm>ΛOO/ǣ8~Þ>Yt~I菾,tD\{񥛇ӛ_9 oo^|x^tSZ_Uҵ[/>h|҃Ż͗4_ۼ+mkom^om@Wwu <}Jy}+nӟ<99| [}~;@\o.o}|}Y/~ljOkt'/8;_0sjlabelled/man/0000755000176200001440000000000013675074762013147 5ustar liggesuserssjlabelled/man/write_spss.Rd0000644000176200001440000000200513676611702015625 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/write.R \name{write_spss} \alias{write_spss} \alias{write_stata} \alias{write_sas} \title{Write data to other statistical software packages} \usage{ write_spss(x, path, drop.na = FALSE, compress = FALSE) write_stata(x, path, drop.na = FALSE, version = 14) write_sas(x, path, drop.na = FALSE) } \arguments{ \item{x}{A data frame that should be saved as file.} \item{path}{File path of the output file.} \item{drop.na}{Logical, if \code{TRUE}, tagged \code{NA} values with value labels will be converted to regular NA's. Else, tagged \code{NA} values will be replaced with their value labels. See 'Examples' and \code{\link{get_na}}.} \item{compress}{Logical, if \code{TRUE} and a SPSS-file should be created, saves \code{x} in \code{zsav} (i.e. compressed SPSS) format.} \item{version}{File version to use. Supports versions 8-14.} } \description{ These functions write the content of a data frame to an SPSS, SAS or Stata-file. } sjlabelled/man/remove_label.Rd0000644000176200001440000000201513675077272016067 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/remove_label.R \name{remove_label} \alias{remove_label} \title{Remove variable labels from variables} \usage{ remove_label(x, ...) } \arguments{ \item{x}{A vector or data frame.} \item{...}{Optional, unquoted names of variables that should be selected for further processing. Required, if \code{x} is a data frame (and no vector) and only selected variables from \code{x} should be processed. You may also use functions like \code{:} or tidyselect's select-helpers. See 'Examples'.} } \value{ \code{x} with removed variable labels } \description{ Remove variable labels from variables. } \examples{ data(efc) x <- efc[, 1:5] get_label(x) str(x) x <- remove_label(x) get_label(x) str(x) } \seealso{ \code{\link{set_label}} to manually set variable labels or \code{\link{get_label}} to get variable labels; \code{\link{set_labels}} to add value labels, replacing the existing ones (and removing non-specified value labels). } sjlabelled/man/unlabel.Rd0000644000176200001440000000213113675075616015054 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/unlabel.R \name{unlabel} \alias{unlabel} \title{Convert labelled vectors into normal classes} \usage{ unlabel(x, verbose = FALSE) } \arguments{ \item{x}{A data frame, which contains \code{labelled} class vectors or a single vector of class \code{labelled}.} \item{verbose}{Logical, if \code{TRUE}, a progress bar is displayed that indicates the progress of converting the imported data.} } \value{ A data frame or single vector (depending on \code{x}) with common object classes. } \description{ This function converts \code{labelled} class vectors into a generic data format, which means that simply all \code{labelled} class attributes will be removed, so all vectors / variables will most likely become \code{atomic}. } \note{ This function is currently only used to avoid possible compatibility issues with \code{\link[haven:labelled]{labelled}} class vectors. Some known issues with \code{labelled} class vectors have already been fixed, so it might be that this function will become redundant in the future. } sjlabelled/man/read_spss.Rd0000644000176200001440000000603213675075616015421 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/read.R \name{read_spss} \alias{read_spss} \alias{read_sas} \alias{read_stata} \alias{read_data} \title{Import data from other statistical software packages} \usage{ read_spss( path, atomic.to.fac = FALSE, drop.labels = FALSE, tag.na = FALSE, enc = NULL, verbose = FALSE ) read_sas( path, path.cat = NULL, atomic.to.fac = FALSE, drop.labels = FALSE, enc = NULL, verbose = FALSE ) read_stata( path, atomic.to.fac = FALSE, drop.labels = FALSE, enc = NULL, verbose = FALSE ) read_data( path, atomic.to.fac = FALSE, drop.labels = FALSE, enc = NULL, verbose = FALSE ) } \arguments{ \item{path}{File path to the data file.} \item{atomic.to.fac}{Logical, if \code{TRUE}, categorical variables imported from the dataset (which are imported as \code{atomic}) will be converted to factors.} \item{drop.labels}{Logical, if \code{TRUE}, unused value labels are removed. See \code{\link{drop_labels}}.} \item{tag.na}{Logical, if \code{TRUE}, missing values are imported as \code{\link[haven:tagged_na]{tagged_na}} values; else, missing values are converted to regular \code{NA} (default behaviour).} \item{enc}{The character encoding used for the file. This defaults to the encoding specified in the file, or UTF-8. Use this argument to override the default encoding stored in the file.} \item{verbose}{Logical, if \code{TRUE}, a progress bar is displayed that indicates the progress of converting the imported data.} \item{path.cat}{Optional, the file path to the SAS catalog file.} } \value{ A data frame containing the imported, labelled data. Retrieve value labels with \code{\link{get_labels}} and variable labels with \code{\link{get_label}}. } \description{ Import data from SPSS, SAS or Stata, including NA's, value and variable labels. } \details{ These read-functions behave slightly differently from \pkg{haven}'s read-functions: \itemize{ \item The vectors in the returned data frame are of class \code{atomic}, not of class \code{labelled}. The labelled-class might cause issues with other packages. \item When importing SPSS data, variables with user defined missings \emph{won't} be read into \code{labelled_spss} objects, but imported as \emph{tagged NA values}. } The \code{atomic.to.fac} option only converts those variables into factors that are of class \code{atomic} and which have value labels after import. Atomic vectors without value labels are considered as continuous and not converted to factors. } \note{ These are wrapper functions for \CRANpkg{haven}'s \code{read_*}-functions. } \examples{ \dontrun{ # import SPSS data set. uses haven's read function mydat <- read_spss("my_spss_data.sav") # use haven's read function, convert atomic to factor mydat <- read_spss("my_spss_data.sav", atomic.to.fac = TRUE) # retrieve variable labels mydat.var <- get_label(mydat) # retrieve value labels mydat.val <- get_labels(mydat)} } \seealso{ Vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package}. } sjlabelled/man/get_na.Rd0000644000176200001440000000455514046441051014662 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_na.R \name{get_na} \alias{get_na} \title{Retrieve tagged NA values of labelled variables} \usage{ get_na(x, as.tag = FALSE) } \arguments{ \item{x}{Variable (vector) with value label attributes, including tagged missing values (see \code{\link[haven:tagged_na]{tagged_na()}}); or a data frame or list with such variables.} \item{as.tag}{Logical, if \code{TRUE}, the returned values are not tagged NA's, but their string representative including the tag value. See 'Examples'.} } \value{ The tagged missing values and their associated value labels from \code{x}, or \code{NULL} if \code{x} has no tagged missing values. } \description{ This function retrieves tagged NA values and their associated value labels from a labelled vector. } \details{ Other statistical software packages (like 'SPSS' or 'SAS') allow to define multiple missing values, e.g. \emph{not applicable}, \emph{refused answer} or "real" missing. These missing types may be assigned with different values, so it is possible to distinguish between these missing types. In R, multiple declared missings cannot be represented in a similar way with the regular missing values. However, \code{tagged_na()} values can do this. Tagged \code{NA}s work exactly like regular R missing values except that they store one additional byte of information: a tag, which is usually a letter ("a" to "z") or character number ("0" to "9"). This allows to indicate different missings. \cr \cr Furthermore, see 'Details' in \code{\link{get_values}}. } \examples{ library(haven) x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) # get current NA values x get_na(x) # which NA has which tag? get_na(x, as.tag = TRUE) # replace only the NA, which is tagged as NA(c) if (require("sjmisc")) { replace_na(x, value = 2, tagged.na = "c") get_na(replace_na(x, value = 2, tagged.na = "c")) # data frame as input y <- labelled(c(2:3, 3:1, tagged_na("y"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "Why" = tagged_na("y"))) get_na(data.frame(x, y)) } } sjlabelled/man/set_labels.Rd0000644000176200001440000002020514046446774015551 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/set_labels.R, R/val_labels.R \name{set_labels} \alias{set_labels} \alias{val_labels} \title{Add value labels to variables} \usage{ set_labels( x, ..., labels, force.labels = FALSE, force.values = TRUE, drop.na = TRUE ) val_labels(x, ..., force.labels = FALSE, force.values = TRUE, drop.na = TRUE) } \arguments{ \item{x}{A vector or data frame.} \item{...}{For \code{set_labels()}, Optional, unquoted names of variables that should be selected for further processing. Required, if \code{x} is a data frame (and no vector) and only selected variables from \code{x} should be processed. You may also use functions like \code{:} or tidyselect's select-helpers. \cr \cr For \code{val_labels()}, pairs of named vectors, where the name equals the variable name, which should be labelled, and the value is the new variable label. \code{val_labels()} also supports quasi-quotation (see 'Examples').} \item{labels}{(Named) character vector of labels that will be added to \code{x} as \code{"labels"} or \code{"value.labels"} attribute. \itemize{ \item if \code{labels} is \strong{not} a \emph{named vector}, its length must equal the value range of \code{x}, i.e. if \code{x} has values from 1 to 3, \code{labels} should have a length of 3; \item if length of \code{labels} is intended to differ from length of unique values of \code{x}, a warning is given. You can still add missing labels with the \code{force.labels} or \code{force.values} arguments; see 'Note'. \item if \code{labels} \strong{is} a \emph{named vector}, value labels will be set accordingly, even if \code{x} has a different length of unique values. See 'Note' and 'Examples'. \item if \code{x} is a data frame, \code{labels} may also be a \code{list} of (named) character vectors; \item if \code{labels} is a \code{list}, it must have the same length as number of columns of \code{x}; \item if \code{labels} is a vector and \code{x} is a data frame, \code{labels} will be applied to each column of \code{x}. } Use \code{labels = ""} to remove labels-attribute from \code{x}.} \item{force.labels}{Logical; if \code{TRUE}, all \code{labels} are added as value label attribute, even if \code{x} has less unique values then length of \code{labels} or if \code{x} has a smaller range then length of \code{labels}. See 'Examples'. This parameter will be ignored, if \code{labels} is a named vector.} \item{force.values}{Logical, if \code{TRUE} (default) and \code{labels} has less elements than unique values of \code{x}, additional values not covered by \code{labels} will be added as label as well. See 'Examples'. This parameter will be ignored, if \code{labels} is a named vector.} \item{drop.na}{Logical, whether existing value labels of tagged NA values (see \code{\link[haven:tagged_na]{tagged_na}}) should be removed (\code{drop.na = TRUE}, the default) or preserved (\code{drop.na = FALSE}). See \code{\link{get_na}} for more details on tagged NA values.} } \value{ \code{x} with value label attributes; or with removed label-attributes if \code{labels = ""}. If \code{x} is a data frame, the complete data frame \code{x} will be returned, with removed or added to variables specified in \code{...}; if \code{...} is not specified, applies to all variables in the data frame. } \description{ This function adds labels as attribute (named \code{"labels"}) to a variable or vector \code{x}, resp. to a set of variables in a data frame or a list-object. A use-case is, for instance, the \pkg{sjPlot}-package, which supports labelled data and automatically assigns labels to axes or legends in plots or to be used in tables. \code{val_labels()} is intended for use within pipe-workflows and has a tidyverse-consistent syntax, including support for quasi-quotation (see 'Examples'). } \note{ \itemize{ \item if \code{labels} is a named vector, \code{force.labels} and \code{force.values} will be ignored, and only values defined in \code{labels} will be labelled; \item if \code{x} has less unique values than \code{labels}, redundant labels will be dropped, see \code{force.labels}; \item if \code{x} has more unique values than \code{labels}, only matching values will be labelled, other values remain unlabelled, see \code{force.values}; } If you only want to change partial value labels, use \code{\link{add_labels}} instead. Furthermore, see 'Note' in \code{\link{get_labels}}. } \examples{ if (require("sjmisc")) { dummy <- sample(1:4, 40, replace = TRUE) frq(dummy) dummy <- set_labels(dummy, labels = c("very low", "low", "mid", "hi")) frq(dummy) # assign labels with named vector dummy <- sample(1:4, 40, replace = TRUE) dummy <- set_labels(dummy, labels = c("very low" = 1, "very high" = 4)) frq(dummy) # force using all labels, even if not all labels # have associated values in vector x <- c(2, 2, 3, 3, 2) # only two value labels x <- set_labels(x, labels = c("1", "2", "3")) x frq(x) # all three value labels x <- set_labels(x, labels = c("1", "2", "3"), force.labels = TRUE) x frq(x) # create vector x <- c(1, 2, 3, 2, 4, NA) # add less labels than values x <- set_labels(x, labels = c("yes", "maybe", "no"), force.values = FALSE) x # add all necessary labels x <- set_labels(x, labels = c("yes", "maybe", "no"), force.values = TRUE) x # set labels and missings x <- c(1, 1, 1, 2, 2, -2, 3, 3, 3, 3, 3, 9) x <- set_labels(x, labels = c("Refused", "One", "Two", "Three", "Missing")) x set_na(x, na = c(-2, 9)) } if (require("haven") && require("sjmisc")) { x <- labelled( c(1:3, tagged_na("a", "c", "z"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) ) # get current NA values x get_na(x) # lose value labels from tagged NA by default, if not specified set_labels(x, labels = c("New Three" = 3)) # do not drop na set_labels(x, labels = c("New Three" = 3), drop.na = FALSE) # set labels via named vector, # not using all possible values data(efc) get_labels(efc$e42dep) x <- set_labels( efc$e42dep, labels = c(`independent` = 1, `severe dependency` = 2, `missing value` = 9) ) get_labels(x, values = "p") get_labels(x, values = "p", non.labelled = TRUE) # labels can also be set for tagged NA value # create numeric vector x <- c(1, 2, 3, 4) # set 2 and 3 as missing, which will automatically set as # tagged NA by 'set_na()' x <- set_na(x, na = c(2, 3)) x # set label via named vector just for tagged NA(3) set_labels(x, labels = c(`New Value` = tagged_na("3"))) # setting same value labels to multiple vectors dummies <- data.frame( dummy1 = sample(1:4, 40, replace = TRUE), dummy2 = sample(1:4, 40, replace = TRUE), dummy3 = sample(1:4, 40, replace = TRUE) ) # and set same value labels for two of three variables test <- set_labels( dummies, dummy1, dummy2, labels = c("very low", "low", "mid", "hi") ) # see result... get_labels(test) } # using quasi-quotation if (require("rlang") && require("dplyr")) { dummies <- data.frame( dummy1 = sample(1:4, 40, replace = TRUE), dummy2 = sample(1:4, 40, replace = TRUE), dummy3 = sample(1:4, 40, replace = TRUE) ) x1 <- "dummy1" x2 <- c("so low", "rather low", "mid", "very hi") dummies \%>\% val_labels( !!x1 := c("really low", "low", "a bit mid", "hi"), dummy3 = !!x2 ) \%>\% get_labels() # ... and named vectors to explicitly set value labels x2 <- c("so low" = 4, "rather low" = 3, "mid" = 2, "very hi" = 1) dummies \%>\% val_labels( !!x1 := c("really low" = 1, "low" = 3, "a bit mid" = 2, "hi" = 4), dummy3 = !!x2 ) \%>\% get_labels(values = "p") } } \seealso{ See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package} for more details; \code{\link{set_label}} to manually set variable labels or \code{\link{get_label}} to get variable labels; \code{\link{add_labels}} to add additional value labels without replacing the existing ones. } sjlabelled/man/sjlabelled-package.Rd0000644000176200001440000000146413446531213017116 0ustar liggesusers\encoding{UTF-8} \name{sjlabelled-package} \alias{sjlabelled-package} \alias{sjlabelled} \docType{package} \title{Labelled Data Utility Functions} \description{ \strong{Purpose of this package} Collection of miscellaneous utility functions (especially intended for people coming from other statistical software packages like 'SPSS', and/or who are new to R), supporting following common tasks when working with labelled data: \itemize{ \item Reading and writing data between R and other statistical software packages like 'SPSS', 'SAS' or 'Stata' \item Easy ways to get, set and change value and variable label attributes, to convert labelled vectors into factors (and vice versa), or to deal with multiple declared missing values etc. } } \author{ Daniel Lüdecke \email{d.luedecke@uke.de} } sjlabelled/man/term_labels.Rd0000644000176200001440000000616214046441051015712 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_model_labels.R \name{term_labels} \alias{term_labels} \alias{get_term_labels} \alias{response_labels} \alias{get_dv_labels} \title{Retrieve labels of model terms from regression models} \usage{ term_labels( models, mark.cat = FALSE, case = NULL, prefix = c("none", "varname", "label"), ... ) get_term_labels( models, mark.cat = FALSE, case = NULL, prefix = c("none", "varname", "label"), ... ) response_labels(models, case = NULL, multi.resp = FALSE, mv = FALSE, ...) get_dv_labels(models, case = NULL, multi.resp = FALSE, mv = FALSE, ...) } \arguments{ \item{models}{One or more fitted regression models. May also be glm's or mixed models.} \item{mark.cat}{Logical, if \code{TRUE}, the returned vector has an attribute with logical values, which indicate whether a label indicates the value from a factor category (attribute value is \code{TRUE}) or a term's variable labels (attribute value is \code{FALSE}).} \item{case}{Desired target case. Labels will automatically converted into the specified character case. See \code{\link[snakecase:to_any_case]{to_any_case()}} for more details on this argument.} \item{prefix}{Indicates whether the value labels of categorical variables should be prefixed, e.g. with the variable name or variable label. May be abbreviated. See 'Examples',} \item{...}{Further arguments passed down to \code{to_any_case()}, like \code{preprocess} or \code{postprocess}.} \item{mv, multi.resp}{Logical, if \code{TRUE} and \code{models} is a multivariate response model from a \code{brmsfit} object, then the labels for each dependent variable (multiple responses) are returned.} } \value{ For \code{term_labels()}, a (named) character vector with variable labels of all model terms, which can be used, for instance, as axis labels to annotate plots. \cr \cr For \code{response_labels()}, a character vector with variable labels from all dependent variables of \code{models}. } \description{ This function retrieves variable labels from model terms. In case of categorical variables, where one variable has multiple dummies, variable name and category value is returned. } \details{ Typically, the variable labels from model terms are returned. However, for categorical terms that have estimates for each category, the value labels are returned as well. As the return value is a named vector, you can easily use it with \pkg{ggplot2}'s \code{scale_*()} functions to annotate plots. } \examples{ # use data set with labelled data data(efc) fit <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) term_labels(fit) # make "education" categorical if (require("sjmisc")) { efc$c172code <- to_factor(efc$c172code) fit <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) term_labels(fit) # prefix value of categorical variables with variable name term_labels(fit, prefix = "varname") # prefix value of categorical variables with value label term_labels(fit, prefix = "label") # get label of dv response_labels(fit) } } sjlabelled/man/get_values.Rd0000644000176200001440000000422413675074762015576 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_values.R \name{get_values} \alias{get_values} \title{Retrieve values of labelled variables} \usage{ get_values(x, sort.val = TRUE, drop.na = FALSE) } \arguments{ \item{x}{Variable (vector) with value label attributes; or a data frame or list with such variables.} \item{sort.val}{Logical, if \code{TRUE} (default), values of associated value labels are sorted.} \item{drop.na}{Logical, if \code{TRUE}, tagged NA values are excluded from the return value. See 'Examples' and \code{\link{get_na}}.} } \value{ The values associated with value labels from \code{x}, or \code{NULL} if \code{x} has no label attributes. } \description{ This function retrieves the values associated with value labels from \code{\link[haven]{labelled}} vectors. Data is also labelled when imported from SPSS, SAS or STATA via \code{\link{read_spss}}, \code{\link{read_sas}} or \code{\link{read_stata}}. } \details{ \code{\link[haven]{labelled}} vectors are numeric by default (when imported with read-functions like \code{\link{read_spss}}) and have variable and value labels attributes. The value labels are associated with the values from the labelled vector. This function returns the values associated with the vector's value labels, which may differ from actual values in the vector (e.g. if not all values have a related label). } \examples{ data(efc) str(efc$e42dep) get_values(efc$e42dep) get_labels(efc$e42dep) library(haven) x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) # get all values get_values(x) # drop NA get_values(x, drop.na = TRUE) # data frame as input y <- labelled(c(2:3, 3:1, tagged_na("y"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "Why" = tagged_na("y"))) get_values(data.frame(x, y)) } \seealso{ \code{\link{get_labels}} for getting value labels and \code{\link{get_na}} to get values for missing values. } sjlabelled/man/figures/0000755000176200001440000000000013446531213014574 5ustar liggesuserssjlabelled/man/figures/logo.png0000644000176200001440000003572313446531213016254 0ustar liggesusersPNG  IHDRxb]etEXtSoftwareAdobe ImageReadyqe<;uIDATx}wՕ:O(K`Iaـ~aMEƻ/^Pe`DV I(Kg:wUwN= ]3Qu׭ 3}Uоg^'ne.oNӁpq|wL;ƃ)9gOnkcjȅ(@D>@>}E:牧ʟ\g>MOmAPAyŝADP(li(2XT!W߽dޔɲMfY?¶j-5xtݮ1\nÇ5ˤSj"1& ~/@U5r Etr?/\> .uwW^yzY?z}B/&Tߌk'+<J(KPPT8ڨ~^^ȯH8{t׽.[L۰Aʛ'Ч?:A_X p C0 %AFXw*Jw AmZ%nY߫E\XNn.D#(UT& }Zx_^?2JP4!"Kv?!N 3MgqN.!Dݒ_ѱ|>N׊mͲY%e› ȐHQ(<.:9/Kv$E(3Ԫj+RuHȈ&Kc{Quϔ%9Ӛk6"55gOB"F<, ;Q? g{ῇ>1vEXI UD\?r{a= x.^cC rMDb1H')me;v c˾1G9#&ڴMX&wpg4@Q MdQdyUe2 ? A3K8h,@}زdDƬӎ4AjV Qkh%bTrNrU '$-X4ò o"_?cz1ڎq|F/q"IԘB ݩ<{lVwa?TYJ#2! O2V.p-jh'CЩHI˓k p[O?aS }op+غp#wC4^ zi nOOwngc[wP6</]f%ݪV?Ї #zؓNg}7O~CgkncSۧBeDkRVд[bFt-ЯT> Bnn9Nb)pRts})Q'`S[qx0K} c>\DDEA4@?$Z? /zӴA$PaTZRfLF {CܪnrHYD lAB E~se8;U\;M9lLfPt_q&](ϧ<+YF(0K/vŸξ L0RD|\ nvEB"\6,>7)=Pȁ(kqt)[Ѽ뀵/diA$X;g  \%Blby1aGw ^Ji"{&v`;`U΂ ^i؏WcX{QnpQs&5&tҒ "TۙTcxܸh| U +mؕ&?nieFy>)؀nK˰&={t5m˗k iˊ*򵏽 ]pxvVP)Hpt oCuvk01rLݱ*բcq>qWQᔢ6\~De Q8؈EQ3iyt߽dĕvؾ^ Qj8U( %~t{6Sx+hgG $H# ѡjl{젟TV,UJM7}gheq]'ӳU؃F}ִ琹œ[D/ULvlOkoW%tùl^$HnNF%v(O׏e~*1]?p+/[EyY K!DQJ( L(m`ИtIISt[}B#Ci ݂\t{Fr8TEBiήAHvJ#4^Zπ%WʦLli͔d,ֆ50>! eYD&T~^5fHS'UZ#Hٟx5w#tg5<ύ0 ~‘( 5[V?JӲ[)o pJ Pљ )k^<*LE`,Ǔt`hVS$$]66VUcN7]&9 l%Pmנ[E_UH ,P'JK>bh `}}[ +WuE~kDM"T,#;#*Uӟժfz8Ȩf `XeUDd5dC `=s{<n9Pi5b2GҸUHX[\%jX xgˣD%\.GzBՋx}Y`jU{fY J2+{xKt >.ň&isJ9y|J3 _G`Qu1}5^֋J2ɲF<3v?BFZU/_p\5a Z@d ]0$.t$NtIDBE;#:)gR"Vc JKzH_ ն"./޻d?Fo+l@d7QCi$2Yf" =q"ybgA>iC٪Yֻ ۥ .A'7%m5T(h:Y(BR8q҅dve-94 NZqBu)4k9oTJ .;s]$qͨ!ԜE_iI>@L$QʞZ"jT\Fp5=7)؟^cQekUnwl;y*!@IȬlCn 8%zTm'AA5v8ɬG_؉H %(TB ʳ\}sd (Q(-9]g+,~eGQjM?=@VO;P\sŋ?Kgfga0wb*s4MPM(l?t|vظL`ڥy9[Rd*[PфvEc!=SBLZH^EE?lfNIOQ5;Vbڒp7 RNSEOe{Xlot~N1vdKj@Zۆ͟Syx=f`\J~sE#Ė*y/MEOf>IV})/=#ªY"2?B 7T`{a{Ao?8)) /Q:͒uQnbiO_~mCoy#_D)c>B 沘  [2cOvHSQX`l~MXyֵHfvt=+Kl. =:OԎ7yd[MA&y[v oOɋC٣an/{Cнݯ*.Pxk fjI 2Ը& Mpk4HnI5C̟P͖TGJWq<ܻo\?IcnVc@A ;LjG! #8`HEhI廙,Eby<:&. ݾZ,nnV_)abV7,fw]1hRi0 cL:xz,33Z=Jzϼ[~}< "Md*rgh:TҹWO/ER` \>VtfM>z>k⸹C=Io3nh٥Ec3(v׍"1d \jm3y.c2r0 *l<`0].!E[H6gqr+ p*KB ͉YFgtM/IAעI4Î{&iLf8\:H }/>4DHԣȾ%_JkTh5" $r.ʉ$68]h n[kw0ɲs|ڜ-;͊ lAߥϞb2D+vOζjL3< %wlA@"+ܟmCPc bGUd$T.g6q gqcWǯȑK|J*@"]ŮM~"?:Q+"Ŝo9*modPQH ?,y>NTkc-K٤oyH-WgAY$oʖub}mLHsp6}-z|c[%i1J;'w%77qo(Eev{j35hQy(;ؔ6C\*)-㪞} 3K }a*b]".hOI=W( I~`? c6!-]Ц=kJf{v\V=8V)_\U#Ipz45ye;~v&p: E xl#2IJ`~Iي\o LU2H),,O 4=aDC FK@K]%'}8*s ^GK^Y r!uiCJ;>dkӴ5( l2hlQzqM x 5vqVnυm`=3u11h+Ji}46'̵Ӑ+c*0qHP{֖; z?p 3@/ &dwAEn9$ 2̨,骕@$ XMpHUoƾ9 U94B u*$X> @!P.)9c{~Avݻg1SYC)0sx=?ظ՝m~0}=Dg:p"Y{P0K3:'^#r2TT"hZfNjZ1$F_- }FQhн/Py@R;wƖw,fwG_%r/ &I0ǟ[r3z\<ɬ F6r\݉3{C(/{u=%+\asZ^}B66eѤj[QBjT6dH%scYnnM cKFӋ7lx}VŽw~En[y]6VfMi%jXa>n#JO L "H223 9E!͂Az.p[SZ4Qm)\ w ;3疍x~MX^YևʑKȗ9F:QBcm$1l;+NnIh:Ip]M 35(a $;iyJjsI[%g_%][Y F¥m#6C4=i}xX}sw%&`n"d#%`o>X0r{/0a.h]\eBژLNgIHsEx!2<ܗAAz`m Ns=`9@I10im |{{{6~}В=]%FpDƸoL[l`205n;m1r@}lιwX0[Bh^΍5 8cnNmr 5PѴdsY9R$c "ܞt;ԍ~] ^t'o!߂TEW8_ɧCO btOQW+EnK۵m/̹fJIObcoV$K?ouW.Uz8t+3=C͡MpnsnfqT'9^ *r&Gɪsh*I{F'G*C=)\)Ffb[`n̥>\nCC[auc _X8 `/uCO7v AuK#SPrQ .?s\$!?wkO;UFzf#/Ic77p<#zZ'}nI郕w_DQ]z+@c,Dq Bx)˿Ǫk &IXO}?j~KYK/ Og0,rS-JW\s ; \#^%`9IN ~P8 z64qRh3ȘUؼjk +!p-_y_7XW>Mwg^=rONj`ެЇL pἻ6|@m Y\v qHz0~{Mn%W!Wm"JFjvU4+(EgVgw_ǜ ]N87v|.'`nhyG'jd!oQ0`bny͡>_T`uv8:{"Uk3eVl;uDXr9`P.*åSZcYr0rj1hScL |}I0+J4q Z gvR]лJuږM3\2p.y c^aI~)H@ehmH]=Ȫ Jً7rT*W޷RTX1)@g T_au1pUA4|I:Mbt]k"on.,Z{} !zO41ZnM9B8,t%3_d`I0cx?`ά5)_W!8=wL+jA cG|͡[Dr!Pzض L45=KZ``9sȗM2ᎁd5[_(;f#thy777B뀡 IgF\GY !U-ȶ J޴Ʈ͕wA줏yo v)`6Ev5ZhVecq=s).M?T*  :- / jKCЛ9.O2[=&wz%VE;pLVιkdKC#D`݂U9T]3a\ES{ka_''^hEDQ]fg$;2w1S{t8N "ѝRL* {tNƽBU:Yq{ܿ,P ,sUZKN;m0rgFUqZxx%-&%A 9 v{$t EF\>&ϹTHvM-a|i06Yzz<%hsErȝCY:lp/ 2mۢ ((({I2M6x]T6"K88yqW6dU8/mIVd"tR8lq=wco0!})3 FQ f'@ t}pMȠ"BN 'S 7p'yieޤ؎"vYB>Y: 2>(tLUРψQWGD$ǣO`uC>A6Τ>wb;r/-#IҼArP G`86,I+!֊?`W^I>~,"%lWӓ wA=MEVϙ pUUcҬu02MGa}T-KA \]i64S >̑1d̖!?>((vj*Q2{zzt큮t3hOIբ̈́P32fA VE縅A^ _UЈEzVKD8WhJhT@v'Bg0>.6%"0':n Vِ[-ClTIg-,窰In H)R{ST#}p%3ПG 5[w2H﮽E!8pȠW .L ɲ,Jla磉D(ICQ]S[jȚLN+ QqlqҀM'A٢r[ QuGA EG)YY,U}%oA8`W^HAş G0षrfQÂVkìCv,"-501y<QBX\L9P']wxoJR bLZ%].ߕ$ ނ4戈Y|O[NYD4(LNع;663t8dT<Nu u[:ᭊ3d>51U+yb,B~xύ <[W;,hf2岙 ^paUoxdJ% qÅsPR LЏv|PBjP0hUoPfťU>ſIT#b:R4 hpD&mxvB__w~_}yqarpݵc/˹z|K,^SI'A" X޾y8v#1bspSNm#G{aϱ~$AkSUgXčܞ窄  sQsEPQkr8E%9C*߇=q}@?V5mj7qi,N7]S(|>[(@grUK(ћHqEqV>tk3V:I,ԶE2idSP<@t8#΄qU>E?~͙IW9{|6ӊYw‘؅8G¢ ѵdEW 8T4S΂w9AN9aW?(Z(.ny͵਎cE^CQEy|V۔HmbZWÄpAf,GVJ[B,[jj cBog4>5sq%D;{ΆqzDҤh2',yc7Zn@gJ6Jڎxww_Dl6o@! ww'a=Rq&̈́d%\.T68`O )lZ>g= ,>؋o޶„ggWАHb8\&An\Ce4UO2Ξpfl:W4Daተn)dL8 3>7,uUkc|P"Uraլ]ߞ{XD-gpZ=WP ݠ̘Tۛ׏Ry%)x) >UrVl_)C29;:GB$F̤`_?3/Ο wm˾δ!%/?귑.A,f?*h)շIx%SA(PJ. ftЭE*պ:#I)C8/8LB^Ջ.qcUETȦYG*}Iu][ l5zlͮvMоyŃ>&\$tsUNmDDvjb˨ޡ.D:٧Kig?쥍hglzJ$ ko\0q4F(daMFgyhŁ.ES&}dkH\k$JlwU"sZj\t{29*tz_i%^xA@}ڍs lmt; Hip 2q^f m3$3͇jWB|@y*׷EQDp/CX{Kr$D3xH#.&q9i =tf{RZp8i*f ŷu!˦ZAʾ8 w}& ͨ&.@;; /2XH4&\%tpJi<'*_w)px4?įDmfg)Kn Sؓ3ϛЭc_(9R6Vب0{{x11cڸoȟE yE\C)JL* GSV.m:m\>恓?#k?}=Id2)ZPl[n$J`ZXT fHaۃ^v▄ngcH*FcˤϖΟߊ@M`Q`|ɶu"⎹]#*W06T/"{A`=3BT.]{S!,˗r|t{ajS^$Od>H$B.O!y6z6g1)jȲ.`I ?̦(IUH*ҕPmn=G4S&F3U7 q5$ լ"?IZ[JvVQb$guFtn~d[^TRZrx4g߫"FEJAEBE {R/=סMaVY3mgVed J%f";;& s[ka4M,NY<ᣄK.>7 kg Zfi|~iEjرgq1 ;' 6q32(=QPX[ήAxmt&qM@gBD@R( /RR5%Ē46LދRYҨ'(¸e;mNEiIZAAlDpЭ}O+CaզUDVYmnǡ('j ^yO i ݞ-P,%߉˔ƣ"sxü-3i$vvRckA%Y&hgyTI"igN3IX(m8dJnoF⹗E4^f0Gw 4{nX0v/a(Nӥ#7Qo?Mj@n-肌P yy"s [*H(*mԆ̧?hw棝"]J*B6d߇4ƅ3-`lPO7`O{Fm(qQQ蚭A!K?G))쩧%Sr:`hHŶ).e{犨8ԠۣzV_%?A[pf+!)~t8xŝU~R*y!I' %Jl3ն*(EpѲTs PQj&,?qKiAſ> }7nFcmTMtPETp$i%Wf ;E|%#j Gp k -0[EvȬCw񅓑; pFE7Ot+xM}5(E WŘU\^s_-irW |M==i~xm,ɖl4s5cقao^-%&xަQp |ų*Yk[h7 /!ն&ř(vDunzُ,G}l/2y: 6M%ieĻ‘ȹ\">Cz tj/JݢWLWݷaيp1ఆ[⿏>y  Jk Vl=M9`i/CJ}X 0]:IENDB`sjlabelled/man/remove_all_labels.Rd0000644000176200001440000000177313675074762017115 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/remove_all_labels.R \name{remove_all_labels} \alias{remove_all_labels} \title{Remove value and variable labels from vector or data frame} \usage{ remove_all_labels(x) } \arguments{ \item{x}{Vector or \code{data.frame} with variable and/or value label attributes} } \value{ \code{x} with removed value and variable label attributes. } \description{ This function removes value and variable label attributes from a vector or data frame. These attributes are typically added to variables when importing foreign data (see \code{\link{read_spss}}) or manually adding label attributes with \code{\link{set_labels}}. } \examples{ data(efc) str(efc) str(remove_all_labels(efc)) } \seealso{ See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package}, and \code{\link{copy_labels}} for adding label attributes (subsetted) data frames. } sjlabelled/man/efc.Rd0000644000176200001440000000103413675074762014171 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/efc.R \docType{data} \name{efc} \alias{efc} \title{Sample dataset from the EUROFAMCARE project} \description{ A SPSS sample data set, imported with the \code{\link{read_spss}} function. } \examples{ # Attach EFC-data data(efc) # Show structure str(efc) # show first rows head(efc) # show variables \dontrun{ library(sjPlot) view_df(efc) # show variable labels get_label(efc) # plot efc-data frame summary sjt.df(efc, altr.row.col = TRUE)} } \keyword{data} sjlabelled/man/set_label.Rd0000644000176200001440000000726413675074762015401 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/set_label.R, R/var_labels.R \name{set_label} \alias{set_label} \alias{set_label<-} \alias{var_labels} \title{Add variable label(s) to variables} \usage{ set_label(x, label) set_label(x) <- value var_labels(x, ...) } \arguments{ \item{x}{Variable (vector), list of variables or a data frame where variables labels should be added as attribute. For \code{var_labels()}, \code{x} must be a data frame only.} \item{label}{If \code{x} is a vector (single variable), use a single character string with the variable label for \code{x}. If \code{x} is a data frame, use a vector with character labels of same length as \code{ncol(x)}. Use \code{label = ""} to remove labels-attribute from \code{x}, resp. set any value of vector \code{label} to \code{""} to remove specific variable label attributes from a data frame's variable.} \item{value}{See \code{label}.} \item{...}{Pairs of named vectors, where the name equals the variable name, which should be labelled, and the value is the new variable label.} } \value{ \code{x}, with variable label attribute(s), which contains the variable name(s); or with removed label-attribute if \code{label = ""}. } \description{ This function adds variable labels as attribute (named \code{"label"}) to the variable \code{x}, resp. to a set of variables in a data frame or a list-object. \code{var_labels()} is intended for use within pipe-workflows and has a tidyverse-consistent syntax, including support for quasi-quotation (see 'Examples'). } \examples{ # manually set value and variable labels dummy <- sample(1:4, 40, replace = TRUE) dummy <- set_labels(dummy, labels = c("very low", "low", "mid", "hi")) dummy <- set_label(dummy, label = "Dummy-variable") # or use: # set_label(dummy) <- "Dummy-variable" # auto-detection of value labels by default, auto-detection of # variable labels if argument "title" set to NULL. \dontrun{ library(sjPlot) sjp.frq(dummy, title = NULL)} # Set variable labels for data frame dummy <- data.frame( a = sample(1:4, 10, replace = TRUE), b = sample(1:4, 10, replace = TRUE), c = sample(1:4, 10, replace = TRUE) ) dummy <- set_label(dummy, c("Variable A", "Variable B", "Variable C")) str(dummy) # remove one variable label dummy <- set_label(dummy, c("Variable A", "", "Variable C")) str(dummy) # setting same variable labels to multiple vectors # create a set of dummy variables dummy1 <- sample(1:4, 40, replace = TRUE) dummy2 <- sample(1:4, 40, replace = TRUE) dummy3 <- sample(1:4, 40, replace = TRUE) # put them in list-object dummies <- list(dummy1, dummy2, dummy3) # and set variable labels for all three dummies dummies <- set_label(dummies, c("First Dummy", "2nd Dummy", "Third dummy")) # see result... get_label(dummies) # use 'var_labels()' to set labels within a pipe-workflow, and # when you need "tidyverse-consistent" api. # Set variable labels for data frame dummy <- data.frame( a = sample(1:4, 10, replace = TRUE), b = sample(1:4, 10, replace = TRUE), c = sample(1:4, 10, replace = TRUE) ) library(magrittr) dummy \%>\% var_labels(a = "First variable", c = "third variable") \%>\% get_label() # with quasi-quotation library(rlang) v1 <- "First variable" v2 <- "Third variable" dummy \%>\% var_labels(a = !!v1, c = !!v2) \%>\% get_label() x1 <- "a" x2 <- "c" dummy \%>\% var_labels(!!x1 := !!v1, !!x2 := !!v2) \%>\% get_label() } \seealso{ See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package} for more details; \code{\link{set_labels}} to manually set value labels or \code{\link{get_label}} to get variable labels. } sjlabelled/man/zap_na_tags.Rd0000644000176200001440000000251713675077272015730 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/zap_labels.R \name{zap_na_tags} \alias{zap_na_tags} \title{Convert tagged NA values into regular NA} \usage{ zap_na_tags(x, ...) } \arguments{ \item{x}{A \code{\link[haven:labelled]{labelled()}} vector with \code{tagged_na} values, or a data frame with such vectors.} \item{...}{Optional, unquoted names of variables that should be selected for further processing. Required, if \code{x} is a data frame (and no vector) and only selected variables from \code{x} should be processed. You may also use functions like \code{:} or tidyselect's select-helpers. See 'Examples'.} } \value{ \code{x}, where all \code{tagged_na} values are converted to \code{NA}. } \description{ Replaces all \code{\link[haven:tagged_na]{tagged_na()}} values with regular \code{NA}. } \examples{ if (require("haven")) { x <- labelled( c(1:3, tagged_na("a", "c", "z"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) ) # get current NA values x get_na(x) zap_na_tags(x) get_na(zap_na_tags(x)) # also works with non-labelled vector that have tagged NA values x <- c(1:5, tagged_na("a"), tagged_na("z"), NA) haven::print_tagged_na(x) haven::print_tagged_na(zap_na_tags(x)) } } sjlabelled/man/as_numeric.Rd0000644000176200001440000000720413675077272015565 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/as_numeric.R \name{as_numeric} \alias{as_numeric} \alias{to_numeric} \alias{as_numeric.data.frame} \title{Convert factors to numeric variables} \usage{ as_numeric(x, ...) to_numeric(x, ...) \method{as_numeric}{data.frame}(x, ..., start.at = NULL, keep.labels = TRUE, use.labels = FALSE) } \arguments{ \item{x}{A vector or data frame.} \item{...}{Optional, unquoted names of variables that should be selected for further processing. Required, if \code{x} is a data frame (and no vector) and only selected variables from \code{x} should be processed. You may also use functions like \code{:} or tidyselect's select-helpers. See 'Examples'.} \item{start.at}{Starting index, i.e. the lowest numeric value of the variable's value range. By default, this argument is \code{NULL}, hence the lowest value of the returned numeric variable corresponds to the lowest factor level (if factor levels are numeric) or to \code{1} (if factor levels are not numeric).} \item{keep.labels}{Logical, if \code{TRUE}, former factor levels will be added as value labels. For numeric factor levels, values labels will be used, if present. See 'Examples' and \code{\link{set_labels}} for more details.} \item{use.labels}{Logical, if \code{TRUE} and \code{x} has numeric value labels, the values defined in the labels (right-hand side of \code{labels}, for instance \code{labels = c(null = 0, one = 1)}) will be set as numeric values (instead of consecutive factor level numbers). See 'Examples'.} } \value{ A numeric variable with values ranging either from \code{start.at} to \code{start.at} + length of factor levels, or to the corresponding factor levels (if these were numeric). If \code{x} is a data frame, the complete data frame \code{x} will be returned, where variables specified in \code{...} are coerced to numeric; if \code{...} is not specified, applies to all variables in the data frame. } \description{ This function converts (replaces) factor levels with the related factor level index number, thus the factor is converted to a numeric variable. } \examples{ data(efc) test <- as_label(efc$e42dep) table(test) table(as_numeric(test)) hist(as_numeric(test, start.at = 0)) # set lowest value of new variable to "5". table(as_numeric(test, start.at = 5)) # numeric factor keeps values dummy <- factor(c("3", "4", "6")) table(as_numeric(dummy)) # do not drop unused factor levels dummy <- ordered(c(rep("No", 5), rep("Maybe", 3)), levels = c("Yes", "No", "Maybe")) as_numeric(dummy) # non-numeric factor is converted to numeric # starting at 1 dummy <- factor(c("D", "F", "H")) table(as_numeric(dummy)) # for numeric factor levels, value labels will be used, if present dummy1 <- factor(c("3", "4", "6")) dummy1 <- set_labels(dummy1, labels = c("first", "2nd", "3rd")) dummy1 as_numeric(dummy1) # for non-numeric factor levels, these will be used. # value labels will be ignored dummy2 <- factor(c("D", "F", "H")) dummy2 <- set_labels(dummy2, labels = c("first", "2nd", "3rd")) dummy2 as_numeric(dummy2) # easily coerce specific variables in a data frame to numeric # and keep other variables, with their class preserved data(efc) efc$e42dep <- as.factor(efc$e42dep) efc$e16sex <- as.factor(efc$e16sex) efc$e17age <- as.factor(efc$e17age) # convert back "sex" and "age" into numeric head(as_numeric(efc, e16sex, e17age)) x <- factor(c("None", "Little", "Some", "Lots")) x <- set_labels(x, labels = c(None = "0.5", Little = "1.3", Some = "1.8", Lots = ".2") ) x as_numeric(x) as_numeric(x, use.labels = TRUE) as_numeric(x, use.labels = TRUE, keep.labels = FALSE) } sjlabelled/man/zap_labels.Rd0000644000176200001440000001043314046454727015547 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/drop_labels.R, R/fill_labels.R, R/zap_labels.R \name{drop_labels} \alias{drop_labels} \alias{fill_labels} \alias{zap_labels} \alias{zap_unlabelled} \title{Drop, add or convert (non-)labelled values} \usage{ drop_labels(x, ..., drop.na = TRUE) fill_labels(x, ...) zap_labels(x, ...) zap_unlabelled(x, ...) } \arguments{ \item{x}{(partially) \code{\link[haven:labelled]{labelled()}} vector or a data frame with such vectors.} \item{...}{Optional, unquoted names of variables that should be selected for further processing. Required, if \code{x} is a data frame (and no vector) and only selected variables from \code{x} should be processed. You may also use functions like \code{:} or tidyselect's select-helpers. See 'Examples'.} \item{drop.na}{Logical, whether existing value labels of tagged NA values (see \code{\link[haven:tagged_na]{tagged_na}}) should be removed (\code{drop.na = TRUE}, the default) or preserved (\code{drop.na = FALSE}). See \code{\link{get_na}} for more details on tagged NA values.} } \value{ \itemize{ \item For \code{zap_labels()}, \code{x}, where all labelled values are converted to \code{NA}. \item For \code{zap_unlabelled()}, \code{x}, where all non-labelled values are converted to \code{NA}. \item For \code{drop_labels()}, \code{x}, where value labels for non-existing values are removed. \item For \code{fill_labels()}, \code{x}, where labels for non-labelled values are added. } If \code{x} is a data frame, the complete data frame \code{x} will be returned, with variables specified in \code{...} being converted; if \code{...} is not specified, applies to all variables in the data frame. } \description{ For (partially) labelled vectors, \code{zap_labels()} will replace all values that have a value label attribute with \code{NA}; \code{zap_unlabelled()}, as counterpart, will replace all values that \emph{don't} have a value label attribute with \code{NA}. \cr \cr \code{drop_labels()} drops all value labels for unused values, i.e. values that are not present in a vector. \code{fill_labels()} is the counterpart to \code{drop_labels()} and adds value labels to a partially labelled vector, i.e. if not all values are labelled, non-labelled values get labels. } \examples{ if (require("sjmisc") && require("dplyr")) { # zap_labels() ---- data(efc) str(efc$e42dep) x <- set_labels( efc$e42dep, labels = c("independent" = 1, "severe dependency" = 4) ) table(x) get_values(x) str(x) # zap all labelled values table(zap_labels(x)) get_values(zap_labels(x)) str(zap_labels(x)) # zap all unlabelled values table(zap_unlabelled(x)) get_values(zap_unlabelled(x)) str(zap_unlabelled(x)) # in a pipe-workflow efc \%>\% select(c172code, e42dep) \%>\% set_labels( e42dep, labels = c("independent" = 1, "severe dependency" = 4) ) \%>\% zap_labels() # drop_labels() ---- rp <- rec_pattern(1, 100) rp # sample data data(efc) # recode carers age into groups of width 5 x <- rec(efc$c160age, rec = rp$pattern) # add value labels to new vector x <- set_labels(x, labels = rp$labels) # watch result. due to recode-pattern, we have age groups with # no observations (zero-counts) frq(x) # now, let's drop zero's frq(drop_labels(x)) # drop labels, also drop NA value labels, then also zap tagged NA if (require("haven")) { x <- labelled(c(1:3, tagged_na("z"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "Unused" = 5, "Not home" = tagged_na("z"))) x drop_labels(x, drop.na = FALSE) drop_labels(x) zap_na_tags(drop_labels(x)) # fill_labels() ---- # create labelled integer, with tagged missings x <- labelled( c(1:3, tagged_na("a", "c", "z"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) ) # get current values and labels x get_labels(x) fill_labels(x) get_labels(fill_labels(x)) # same as get_labels(x, non.labelled = TRUE) } } } sjlabelled/man/as_factor.Rd0000644000176200001440000000602314046441051015356 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/as_factor.R \name{as_factor} \alias{as_factor} \alias{to_factor} \alias{as_factor.data.frame} \title{Convert variable into factor and keep value labels} \usage{ as_factor(x, ...) to_factor(x, ...) \method{as_factor}{data.frame}(x, ..., add.non.labelled = FALSE) } \arguments{ \item{x}{A vector or data frame.} \item{...}{Optional, unquoted names of variables that should be selected for further processing. Required, if \code{x} is a data frame (and no vector) and only selected variables from \code{x} should be processed. You may also use functions like \code{:} or tidyselect's select-helpers. See 'Examples'.} \item{add.non.labelled}{Logical, if \code{TRUE}, non-labelled values also get value labels.} } \value{ A factor, including variable and value labels. If \code{x} is a data frame, the complete data frame \code{x} will be returned, where variables specified in \code{...} are coerced to factors (including variable and value labels); if \code{...} is not specified, applies to all variables in the data frame. } \description{ This function converts a variable into a factor, but preserves variable and value label attributes. } \details{ \code{as_factor} converts numeric values into a factor with numeric levels. \code{\link{as_label}}, however, converts a vector into a factor and uses value labels as factor levels. } \note{ This function is intended for use with vectors that have value and variable label attributes. Unlike \code{\link{as.factor}}, \code{as_factor} converts a variable into a factor and preserves the value and variable label attributes. \cr \cr Adding label attributes is automatically done by importing data sets with one of the \code{read_*}-functions, like \code{\link{read_spss}}. Else, value and variable labels can be manually added to vectors with \code{\link{set_labels}} and \code{\link{set_label}}. } \examples{ if (require("sjmisc") && require("magrittr")) { data(efc) # normal factor conversion, loses value attributes x <- as.factor(efc$e42dep) frq(x) # factor conversion, which keeps value attributes x <- as_factor(efc$e42dep) frq(x) # create partially labelled vector x <- set_labels( efc$e42dep, labels = c( `1` = "independent", `4` = "severe dependency", `9` = "missing value" )) # only copy existing value labels as_factor(x) \%>\% head() get_labels(as_factor(x), values = "p") # also add labels to non-labelled values as_factor(x, add.non.labelled = TRUE) \%>\% head() get_labels(as_factor(x, add.non.labelled = TRUE), values = "p") # easily coerce specific variables in a data frame to factor # and keep other variables, with their class preserved as_factor(efc, e42dep, e16sex, c172code) \%>\% head() # use select-helpers from dplyr-package if (require("dplyr")) { as_factor(efc, contains("cop"), c161sex:c175empl) \%>\% head() } } } sjlabelled/man/set_na.Rd0000644000176200001440000001407014046443545014701 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/set_na.R \name{set_na} \alias{set_na} \title{Replace specific values in vector with NA} \usage{ set_na(x, ..., na, drop.levels = TRUE, as.tag = FALSE) } \arguments{ \item{x}{A vector or data frame.} \item{...}{Optional, unquoted names of variables that should be selected for further processing. Required, if \code{x} is a data frame (and no vector) and only selected variables from \code{x} should be processed. You may also use functions like \code{:} or tidyselect's select-helpers. See 'Examples'.} \item{na}{Numeric vector with values that should be replaced with NA values, or a character vector if values of factors or character vectors should be replaced. For labelled vectors, may also be the name of a value label. In this case, the associated values for the value labels in each vector will be replaced with \code{NA}. \code{na} can also be a named vector. If \code{as.tag = FALSE}, values will be replaced only in those variables that are indicated by the value names (see 'Examples').} \item{drop.levels}{Logical, if \code{TRUE}, factor levels of values that have been replaced with \code{NA} are dropped. See 'Examples'.} \item{as.tag}{Logical, if \code{TRUE}, values in \code{x} will be replaced by \code{tagged_na}, else by usual \code{NA} values. Use a named vector to assign the value label to the tagged NA value (see 'Examples').} } \value{ \code{x}, with all values in \code{na} being replaced by \code{NA}. If \code{x} is a data frame, the complete data frame \code{x} will be returned, with NA's set for variables specified in \code{...}; if \code{...} is not specified, applies to all variables in the data frame. } \description{ This function replaces specific values of variables with \code{NA}. } \details{ \code{set_na()} converts all values defined in \code{na} with a related \code{NA} or tagged NA value (see \code{\link[haven:tagged_na]{tagged_na()}}). Tagged \code{NA}s work exactly like regular R missing values except that they store one additional byte of information: a tag, which is usually a letter ("a" to "z") or character number ("0" to "9"). \cr \cr \strong{Different NA values for different variables} \cr \cr If \code{na} is a named vector \emph{and} \code{as.tag = FALSE}, the names indicate variable names, and the associated values indicate those values that should be replaced by \code{NA} in the related variable. For instance, \code{set_na(x, na = c(v1 = 4, v2 = 3))} would replace all 4 in \code{v1} with \code{NA} and all 3 in \code{v2} with \code{NA}. \cr \cr If \code{na} is a named list \emph{and} \code{as.tag = FALSE}, it is possible to replace different multiple values by \code{NA} for different variables separately. For example, \code{set_na(x, na = list(v1 = c(1, 4), v2 = 5:7))} would replace all 1 and 4 in \code{v1} with \code{NA} and all 5 to 7 in \code{v2} with \code{NA}. \cr \cr Furthermore, see also 'Details' in \code{\link{get_na}}. } \note{ Labels from values that are replaced with NA and no longer used will be removed from \code{x}, however, other value and variable label attributes are preserved. For more details on labelled data, see vignette \href{https://cran.r-project.org/package=sjlabelled/vignettes/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package}. } \examples{ if (require("sjmisc") && require("dplyr") && require("haven")) { # create random variable dummy <- sample(1:8, 100, replace = TRUE) # show value distribution table(dummy) # set value 1 and 8 as missings dummy <- set_na(dummy, na = c(1, 8)) # show value distribution, including missings table(dummy, useNA = "always") # add named vector as further missing value set_na(dummy, na = c("Refused" = 5), as.tag = TRUE) # see different missing types print_tagged_na(set_na(dummy, na = c("Refused" = 5), as.tag = TRUE)) # create sample data frame dummy <- data.frame(var1 = sample(1:8, 100, replace = TRUE), var2 = sample(1:10, 100, replace = TRUE), var3 = sample(1:6, 100, replace = TRUE)) # set value 2 and 4 as missings dummy \%>\% set_na(na = c(2, 4)) \%>\% head() dummy \%>\% set_na(na = c(2, 4), as.tag = TRUE) \%>\% get_na() dummy \%>\% set_na(na = c(2, 4), as.tag = TRUE) \%>\% get_values() data(efc) dummy <- data.frame( var1 = efc$c82cop1, var2 = efc$c83cop2, var3 = efc$c84cop3 ) # check original distribution of categories lapply(dummy, table, useNA = "always") # set 3 to NA for two variables lapply(set_na(dummy, var1, var3, na = 3), table, useNA = "always") # if 'na' is a named vector *and* 'as.tag = FALSE', different NA-values # can be specified for each variable set.seed(1) dummy <- data.frame( var1 = sample(1:8, 10, replace = TRUE), var2 = sample(1:10, 10, replace = TRUE), var3 = sample(1:6, 10, replace = TRUE) ) dummy # Replace "3" in var1 with NA, "5" in var2 and "6" in var3 set_na(dummy, na = c(var1 = 3, var2 = 5, var3 = 6)) # if 'na' is a named list *and* 'as.tag = FALSE', for each # variable different multiple NA-values can be specified set_na(dummy, na = list(var1 = 1:3, var2 = c(7, 8), var3 = 6)) # drop unused factor levels when being set to NA x <- factor(c("a", "b", "c")) x set_na(x, na = "b", as.tag = TRUE) set_na(x, na = "b", drop.levels = FALSE, as.tag = TRUE) # set_na() can also remove a missing by defining the value label # of the value that should be replaced with NA. This is in particular # helpful if a certain category should be set as NA, however, this category # is assigned with different values accross variables x1 <- sample(1:4, 20, replace = TRUE) x2 <- sample(1:7, 20, replace = TRUE) x1 <- set_labels(x1, labels = c("Refused" = 3, "No answer" = 4)) x2 <- set_labels(x2, labels = c("Refused" = 6, "No answer" = 7)) tmp <- data.frame(x1, x2) get_labels(tmp) table(tmp, useNA = "always") get_labels(set_na(tmp, na = "No answer")) table(set_na(tmp, na = "No answer"), useNA = "always") # show values tmp set_na(tmp, na = c("Refused", "No answer")) } } sjlabelled/man/label_to_colnames.Rd0000644000176200001440000000210314046415173017060 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/label_to_colnames.R \name{label_to_colnames} \alias{label_to_colnames} \title{Use variable labels as column names} \usage{ label_to_colnames(x, ...) } \arguments{ \item{x}{A data frame.} \item{...}{Optional, unquoted names of variables that should be selected for further processing. Required, if \code{x} is a data frame (and no vector) and only selected variables from \code{x} should be processed. You may also use functions like \code{:} or tidyselect's select-helpers. See 'Examples'.} } \value{ \code{x} with variable labels as column names. For variables without variable labels, the column name is left unchanged. } \description{ This function sets variable labels as column names, to use "labelled data" also for those functions that cannot cope with labelled data by default. } \examples{ data(iris) iris <- var_labels( iris, Petal.Length = "Petal length (cm)", Petal.Width = "Petal width (cm)" ) colnames(iris) plot(iris) colnames(label_to_colnames(iris)) plot(label_to_colnames(iris)) } sjlabelled/man/as_label.Rd0000644000176200001440000001373013675077272015203 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/as_character.R, R/as_label.R \name{as_character} \alias{as_character} \alias{to_character} \alias{as_character.data.frame} \alias{as_label} \alias{to_label} \alias{as_label.data.frame} \title{Convert variable into factor with associated value labels} \usage{ as_character(x, ...) to_character(x, ...) \method{as_character}{data.frame}( x, ..., add.non.labelled = FALSE, prefix = FALSE, var.label = NULL, drop.na = TRUE, drop.levels = FALSE, keep.labels = FALSE ) as_label(x, ...) to_label(x, ...) \method{as_label}{data.frame}( x, ..., add.non.labelled = FALSE, prefix = FALSE, var.label = NULL, drop.na = TRUE, drop.levels = FALSE, keep.labels = FALSE ) } \arguments{ \item{x}{A vector or data frame.} \item{...}{Optional, unquoted names of variables that should be selected for further processing. Required, if \code{x} is a data frame (and no vector) and only selected variables from \code{x} should be processed. You may also use functions like \code{:} or tidyselect's select-helpers. See 'Examples'.} \item{add.non.labelled}{Logical, if \code{TRUE}, values without associated value label will also be converted to labels (as is). See 'Examples'.} \item{prefix}{Logical, if \code{TRUE}, the value labels used as factor levels or character values will be prefixed with their associated values. See 'Examples'.} \item{var.label}{Optional string, to set variable label attribute for the returned variable (see vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package}). If \code{NULL} (default), variable label attribute of \code{x} will be used (if present). If empty, variable label attributes will be removed.} \item{drop.na}{Logical, if \code{TRUE}, tagged \code{NA} values with value labels will be converted to regular NA's. Else, tagged \code{NA} values will be replaced with their value labels. See 'Examples' and \code{\link{get_na}}.} \item{drop.levels}{Logical, if \code{TRUE}, unused factor levels will be dropped (i.e. \code{\link{droplevels}} will be applied before returning the result).} \item{keep.labels}{Logical, if \code{TRUE}, value labels are preserved This allows users to quickly convert back factors to numeric vectors with \code{as_numeric()}.} } \value{ A factor with the associated value labels as factor levels. If \code{x} is a data frame, the complete data frame \code{x} will be returned, where variables specified in \code{...} are coerced to factors; if \code{...} is not specified, applies to all variables in the data frame. \code{as_character()} returns a character vector. } \description{ \code{as_label()} converts (replaces) values of a variable (also of factors or character vectors) with their associated value labels. Might be helpful for factor variables. For instance, if you have a Gender variable with 0/1 value, and associated labels are male/female, this function would convert all 0 to male and all 1 to female and returns the new variable as factor. \code{as_character()} does the same as \code{as_label()}, but returns a character vector. } \details{ See 'Details' in \code{\link{get_na}}. } \note{ Value label attributes (see \code{\link{get_labels}}) will be removed when converting variables to factors. } \examples{ data(efc) print(get_labels(efc)['c161sex']) head(efc$c161sex) head(as_label(efc$c161sex)) print(get_labels(efc)['e42dep']) table(efc$e42dep) table(as_label(efc$e42dep)) head(efc$e42dep) head(as_label(efc$e42dep)) # structure of numeric values won't be changed # by this function, it only applies to labelled vectors # (typically categorical or factor variables) str(efc$e17age) str(as_label(efc$e17age)) # factor with non-numeric levels as_label(factor(c("a", "b", "c"))) # factor with non-numeric levels, prefixed x <- factor(c("a", "b", "c")) x <- set_labels(x, labels = c("ape", "bear", "cat")) as_label(x, prefix = TRUE) # create vector x <- c(1, 2, 3, 2, 4, NA) # add less labels than values x <- set_labels( x, labels = c("yes", "maybe", "no"), force.labels = FALSE, force.values = FALSE ) # convert to label w/o non-labelled values as_label(x) # convert to label, including non-labelled values as_label(x, add.non.labelled = TRUE) # create labelled integer, with missing flag if (require("haven")) { x <- labelled( c(1:3, tagged_na("a", "c", "z"), 4:1, 2:3), c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) ) # to labelled factor, with missing labels as_label(x, drop.na = FALSE) # to labelled factor, missings removed as_label(x, drop.na = TRUE) # keep missings, and use non-labelled values as well as_label(x, add.non.labelled = TRUE, drop.na = FALSE) } # convert labelled character to factor dummy <- c("M", "F", "F", "X") dummy <- set_labels( dummy, labels = c(`M` = "Male", `F` = "Female", `X` = "Refused") ) get_labels(dummy,, "p") as_label(dummy) # drop unused factor levels, but preserve variable label x <- factor(c("a", "b", "c"), levels = c("a", "b", "c", "d")) x <- set_labels(x, labels = c("ape", "bear", "cat")) set_label(x) <- "A factor!" x as_label(x, drop.levels = TRUE) # change variable label as_label(x, var.label = "New variable label!", drop.levels = TRUE) # convert to numeric and back again, preserving label attributes # *and* values in numeric vector x <- c(0, 1, 0, 4) x <- set_labels(x, labels = c(`null` = 0, `one` = 1, `four` = 4)) # to factor as_label(x) # to factor, back to numeric - values are 1, 2 and 3, # instead of original 0, 1 and 4 as_numeric(as_label(x)) # preserve label-attributes when converting to factor, use these attributes # to restore original numeric values when converting back to numeric as_numeric(as_label(x, keep.labels = TRUE), use.labels = TRUE) # easily coerce specific variables in a data frame to factor # and keep other variables, with their class preserved as_label(efc, e42dep, e16sex, c172code) } sjlabelled/man/get_labels.Rd0000644000176200001440000001050713675075616015541 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/get_labels.R \name{get_labels} \alias{get_labels} \title{Retrieve value labels of labelled data} \usage{ get_labels( x, attr.only = FALSE, values = NULL, non.labelled = FALSE, drop.na = TRUE, drop.unused = FALSE ) } \arguments{ \item{x}{A data frame with variables that have value label attributes (e.g. from an imported SPSS, SAS or STATA data set, via \code{\link{read_spss}}, \code{\link{read_sas}} or \code{\link{read_stata}}); a variable (vector) with value label attributes; or a \code{list} of variables with values label attributes. If \code{x} has no label attributes, factor levels are returned. See 'Examples'.} \item{attr.only}{Logical, if \code{TRUE}, labels are only searched for in the the vector's \code{attributes}; else, if \code{attr.only = FALSE} and \code{x} has no label attributes, factor levels or string values are returned. See 'Examples'.} \item{values}{String, indicating whether the values associated with the value labels are returned as well. If \code{values = "as.name"} (or \code{values = "n"}), values are set as \code{names} attribute of the returned object. If \code{values = "as.prefix"} (or \code{values = "p"}), values are included as prefix to each label. See 'Examples'.} \item{non.labelled}{Logical, if \code{TRUE}, values without labels will also be included in the returned labels (see \code{\link{fill_labels}}).} \item{drop.na}{Logical, whether labels of tagged NA values (see \code{\link[haven:tagged_na]{tagged_na()}}) should be included in the return value or not. By default, labelled (tagged) missing values are not returned. See \code{\link{get_na}} for more details on tagged NA values.} \item{drop.unused}{Logical, if \code{TRUE}, unused labels will be removed from the return value.} } \value{ Either a list with all value labels from all variables if \code{x} is a \code{data.frame} or \code{list}; a string with the value labels, if \code{x} is a variable; or \code{NULL} if no value label attribute was found. } \description{ This function returns the value labels of labelled data. } \examples{ # import SPSS data set # mydat <- read_spss("my_spss_data.sav") # retrieve variable labels # mydat.var <- get_label(mydat) # retrieve value labels # mydat.val <- get_labels(mydat) data(efc) get_labels(efc$e42dep) # simple barplot barplot(table(efc$e42dep)) # get value labels to annotate barplot barplot(table(efc$e42dep), names.arg = get_labels(efc$e42dep), main = get_label(efc$e42dep)) # include associated values get_labels(efc$e42dep, values = "as.name") # include associated values get_labels(efc$e42dep, values = "as.prefix") # get labels from multiple variables get_labels(list(efc$e42dep, efc$e16sex, efc$e15relat)) # create a dummy factor f1 <- factor(c("hi", "low", "mid")) # search for label attributes only get_labels(f1, attr.only = TRUE) # search for factor levels as well get_labels(f1) # same for character vectors c1 <- c("higher", "lower", "mid") # search for label attributes only get_labels(c1, attr.only = TRUE) # search for string values as well get_labels(c1) # create vector x <- c(1, 2, 3, 2, 4, NA) # add less labels than values x <- set_labels(x, labels = c("yes", "maybe", "no"), force.values = FALSE) # get labels for labelled values only get_labels(x) # get labels for all values get_labels(x, non.labelled = TRUE) # get labels, including tagged NA values library(haven) x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) # get current NA values x get_labels(x, values = "n", drop.na = FALSE) # create vector with unused labels data(efc) efc$e42dep <- set_labels( efc$e42dep, labels = c("independent" = 1, "dependent" = 4, "not used" = 5) ) get_labels(efc$e42dep) get_labels(efc$e42dep, drop.unused = TRUE) get_labels(efc$e42dep, non.labelled = TRUE, drop.unused = TRUE) } \seealso{ See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package} for more details; \code{\link{set_labels}} to manually set value labels, \code{\link{get_label}} to get variable labels and \code{\link{get_values}} to retrieve the values associated with value labels. } sjlabelled/man/convert_case.Rd0000644000176200001440000000241713675075616016114 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/convert_case.R \name{convert_case} \alias{convert_case} \title{Generic case conversion for labels} \usage{ convert_case(lab, case = NULL, verbose = FALSE, ...) } \arguments{ \item{lab}{Character vector that should be case converted.} \item{case}{Desired target case. Labels will automatically converted into the specified character case. See \code{\link[snakecase:to_any_case]{to_any_case()}} for more details on this argument.} \item{verbose}{Toggle warnings and messages on or off.} \item{...}{Further arguments passed down to \code{to_any_case()}, like \code{sep_in} or \code{sep_out}.} } \value{ \code{lab}, with converted case. } \description{ This function wraps \code{to_any_case()} from the \pkg{snakecase} package with certain defaults for the \code{sep_in} and \code{sep_out} arguments, used for instance to convert cases in \code{\link{term_labels}}. } \details{ When calling \code{to_any_case()} from \pkg{snakecase}, the \code{sep_in} argument is set to \code{"(?\% get_label(e42dep, e16sex) # set default values get_label(mtcars, mpg, cyl, def.value = "no var labels") # simple barplot barplot(table(efc$e42dep)) # get value labels to annotate barplot barplot(table(efc$e42dep), names.arg = get_labels(efc$e42dep), main = get_label(efc$e42dep)) # get labels from multiple variables get_label(list(efc$e42dep, efc$e16sex, efc$e15relat)) # use case conversion for human-readable labels data(iris) get_label(iris, def.value = colnames(iris)) get_label(iris, def.value = colnames(iris), case = "parsed") } \seealso{ See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package} for more details; \code{\link{set_label}} to manually set variable labels or \code{\link{get_labels}} to get value labels; \code{\link{var_labels}} to set multiple variable labels at once. } sjlabelled/man/copy_labels.Rd0000644000176200001440000000441413675077272015734 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/copy_labels.R \name{copy_labels} \alias{copy_labels} \title{Copy value and variable labels to (subsetted) data frames} \usage{ copy_labels(df_new, df_origin = NULL, ...) } \arguments{ \item{df_new}{The new, subsetted data frame.} \item{df_origin}{The original data frame where the subset (\code{df_new}) stems from; use \code{NULL}, if value and variable labels from \code{df_new} should be removed.} \item{...}{Optional, unquoted names of variables that should be selected for further processing. Required, if \code{x} is a data frame (and no vector) and only selected variables from \code{x} should be processed. You may also use functions like \code{:} or tidyselect's select-helpers. See 'Examples'.} } \value{ Returns \code{df_new} with either removed value and variable label attributes (if \code{df_origin = NULL}) or with copied value and variable label attributes (if \code{df_origin} was the original subsetted data frame). } \description{ Subsetting-functions usually drop value and variable labels from subsetted data frames (if the original data frame has value and variable label attributes). This function copies these value and variable labels back to subsetted data frames that have been subsetted, for instance, with \code{\link{subset}}. } \note{ In case \code{df_origin = NULL}, all possible label attributes from \code{df_new} are removed. } \examples{ data(efc) # create subset - drops label attributes efc.sub <- subset(efc, subset = e16sex == 1, select = c(4:8)) str(efc.sub) # copy back attributes from original dataframe efc.sub <- copy_labels(efc.sub, efc) str(efc.sub) # remove all labels efc.sub <- copy_labels(efc.sub) str(efc.sub) # create subset - drops label attributes efc.sub <- subset(efc, subset = e16sex == 1, select = c(4:8)) if (require("dplyr")) { # create subset with dplyr's select - attributes are preserved efc.sub2 <- select(efc, c160age, e42dep, neg_c_7, c82cop1, c84cop3) # copy labels from those columns that are available copy_labels(efc.sub, efc.sub2) \%>\% str() } # copy labels from only some columns str(copy_labels(efc.sub, efc, e42dep)) str(copy_labels(efc.sub, efc, -e17age)) } sjlabelled/man/as_labelled.Rd0000644000176200001440000000354714046411637015663 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/as_labelled.R \name{as_labelled} \alias{as_labelled} \title{Convert vector to labelled class} \usage{ as_labelled( x, add.labels = FALSE, add.class = FALSE, skip.strings = FALSE, tag.na = FALSE ) } \arguments{ \item{x}{Variable (vector), \code{data.frame} or \code{list} of variables that should be converted to \code{\link[haven:labelled]{labelled()}}-class objects.} \item{add.labels}{Logical, if \code{TRUE}, non-labelled values will be labelled with the corresponding value.} \item{add.class}{Logical, if \code{TRUE}, \code{x} preserves its former \code{class}-attribute and \code{labelled} is added as additional attribute. If \code{FALSE} (default), all former \code{class}-attributes will be removed and the class-attribute of \code{x} will only be \code{labelled}.} \item{skip.strings}{Logical, if \code{TRUE}, character vector are not converted into labelled-vectors. Else, character vectors are converted to factors vector and the associated values are used as value labels.} \item{tag.na}{Logical, if \code{TRUE}, tagged \code{NA} values are replaced by their associated values. This is required, for instance, when writing data back to SPSS.} } \value{ \code{x}, as \code{labelled}-class object. } \description{ Converts a (labelled) vector of any class into a \code{labelled} class vector, resp. adds a \code{labelled} class-attribute. } \examples{ data(efc) str(efc$e42dep) x <- as_labelled(efc$e42dep) str(x) x <- as_labelled(efc$e42dep, add.class = TRUE) str(x) a <- c(1, 2, 4) x <- as_labelled(a, add.class = TRUE) str(x) data(efc) x <- set_labels(efc$e42dep, labels = c(`1` = "independent", `4` = "severe dependency")) x1 <- as_labelled(x, add.labels = FALSE) x2 <- as_labelled(x, add.labels = TRUE) str(x1) str(x2) get_values(x1) get_values(x2) } sjlabelled/man/add_labels.Rd0000644000176200001440000000775113675077272015521 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/add_labels.R, R/remove_labels.R \name{add_labels} \alias{add_labels} \alias{replace_labels} \alias{remove_labels} \title{Add, replace or remove value labels of variables} \usage{ add_labels(x, ..., labels) replace_labels(x, ..., labels) remove_labels(x, ..., labels) } \arguments{ \item{x}{A vector or data frame.} \item{...}{Optional, unquoted names of variables that should be selected for further processing. Required, if \code{x} is a data frame (and no vector) and only selected variables from \code{x} should be processed. You may also use functions like \code{:} or tidyselect's select-helpers. See 'Examples'.} \item{labels}{\describe{ \item{For \code{add_labels()}}{A named (numeric) vector of labels that will be added to \code{x} as label attribute.} \item{For \code{remove_labels()}}{Either a numeric vector, indicating the position of one or more label attributes that should be removed; a character vector with names of label attributes that should be removed; or a \code{\link[haven:tagged_na]{tagged_na()}} to remove the labels from specific NA values.} }} } \value{ \code{x} with additional or removed value labels. If \code{x} is a data frame, the complete data frame \code{x} will be returned, with removed or added to variables specified in \code{...}; if \code{...} is not specified, applies to all variables in the data frame. } \description{ These functions add, replace or remove value labels to or from variables. } \details{ \code{add_labels()} adds \code{labels} to the existing value labels of \code{x}, however, unlike \code{\link{set_labels}}, it does \emph{not} remove labels that were \emph{not} specified in \code{labels}. \code{add_labels()} also replaces existing value labels, but preserves the remaining labels. \cr \cr \code{remove_labels()} is the counterpart to \code{add_labels()}. It removes labels from a label attribute of \code{x}. \cr \cr \code{replace_labels()} is an alias for \code{add_labels()}. } \examples{ # add_labels() data(efc) get_labels(efc$e42dep) x <- add_labels(efc$e42dep, labels = c(`nothing` = 5)) get_labels(x) if (require("dplyr")) { x <- efc \%>\% # select three variables dplyr::select(e42dep, c172code, c161sex) \%>\% # only add new label to two of those add_labels(e42dep, c172code, labels = c(`nothing` = 5)) # see data frame, with selected variables having new labels get_labels(x) } x <- add_labels(efc$e42dep, labels = c(`nothing` = 5, `zero value` = 0)) get_labels(x, values = "p") # replace old value labels x <- add_labels( efc$e42dep, labels = c(`not so dependent` = 4, `lorem ipsum` = 5) ) get_labels(x, values = "p") # replace specific missing value (tagged NA) if (require("haven")) { x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) # get current NA values x # tagged NA(c) has currently the value label "First", will be # replaced by "Second" now. replace_labels(x, labels = c("Second" = tagged_na("c"))) } # remove_labels() x <- remove_labels(efc$e42dep, labels = 2) get_labels(x, values = "p") x <- remove_labels(efc$e42dep, labels = "independent") get_labels(x, values = "p") if (require("haven")) { x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) # get current NA values get_na(x) get_na(remove_labels(x, labels = tagged_na("c"))) } } \seealso{ \code{\link{set_label}} to manually set variable labels or \code{\link{get_label}} to get variable labels; \code{\link{set_labels}} to add value labels, replacing the existing ones (and removing non-specified value labels). } sjlabelled/man/is_labelled.Rd0000644000176200001440000000067013675074762015700 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/is_labelled.R \name{is_labelled} \alias{is_labelled} \title{Check whether object is of class "labelled"} \usage{ is_labelled(x) } \arguments{ \item{x}{An object.} } \value{ Logical, \code{TRUE} if \code{x} inherits from class \code{labelled}, \code{FALSE} otherwise. } \description{ This function checks whether \code{x} is of class \code{labelled}. } sjlabelled/DESCRIPTION0000644000176200001440000000273114046513502014064 0ustar liggesusersPackage: sjlabelled Type: Package Encoding: UTF-8 Title: Labelled Data Utility Functions Version: 1.1.8 Authors@R: c( person("Daniel", "Lüdecke", role = c("aut", "cre"), email = "d.luedecke@uke.de", comment = c(ORCID = "0000-0002-8895-3206")), person("avid", "Ranzolin", role = "ctb", email = "daranzolin@gmail.com"), person("Jonathan", "De Troye", role = "ctb", email = "detroyejr@outlook.com") ) Maintainer: Daniel Lüdecke Description: Collection of functions dealing with labelled data, like reading and writing data between R and other statistical software packages like 'SPSS', 'SAS' or 'Stata', and working with labelled data. This includes easy ways to get, set or change value and variable label attributes, to convert labelled vectors into factors or numeric (and vice versa), or to deal with multiple declared missing values. License: GPL-3 Depends: R (>= 3.4) Imports: insight (>= 0.7.0), stats, tools, utils Suggests: dplyr, haven (>= 1.1.2), magrittr, sjmisc, sjPlot, knitr, rlang, rmarkdown, snakecase, testthat URL: https://strengejacke.github.io/sjlabelled/ BugReports: https://github.com/strengejacke/sjlabelled/issues RoxygenNote: 7.1.1 VignetteBuilder: knitr NeedsCompilation: no Packaged: 2021-05-11 13:59:56 UTC; Daniel Author: Daniel Lüdecke [aut, cre] (), avid Ranzolin [ctb], Jonathan De Troye [ctb] Repository: CRAN Date/Publication: 2021-05-11 14:40:02 UTC sjlabelled/build/0000755000176200001440000000000014046506732013461 5ustar liggesuserssjlabelled/build/vignette.rds0000644000176200001440000000047014046506732016021 0ustar liggesusersR=O0uQh%PiaaB7A `C4NLmclS$=qPROLoZ'aA9dz[Ǎ|xSbJhL ^ܸJR %:Wr5{ 䚕+\3ؽW-*c-0 ,8B[VHu:[6wI1kxEzg2w_D&\.wLxܵ'<+-rB ^Mh;ђК,MA_Ksjlabelled/tests/0000755000176200001440000000000013446531213013517 5ustar liggesuserssjlabelled/tests/testthat/0000755000176200001440000000000014046513502015355 5ustar liggesuserssjlabelled/tests/testthat/test-remove_labels.R0000644000176200001440000000043313647274656021320 0ustar liggesuserslibrary(sjlabelled) test_that("remove_labels", { z <- factor(LETTERS[3:1], ordered = TRUE) z <- sjlabelled::set_labels(z, labels = c("yes", "maybe", "no")) x <- sjlabelled::remove_labels(z, labels = 2) expect_equal(attributes(x)$labels, c(yes = "A", no = "C")) }) sjlabelled/tests/testthat/test-as_numeric.R0000644000176200001440000000147413446531213020612 0ustar liggesuserscontext("sjlabelled, as_numeric") library(sjlabelled) test_that("as_numeric", { expect_equal(as_numeric(factor(c(0,1,2)), keep.labels = FALSE), c(0,1,2)) expect_equal(as_numeric(factor(c(2,3,4)), keep.labels = FALSE), c(2,3,4)) expect_equal(as_numeric(factor(c("a", "b", "c")), keep.labels = FALSE), c(1,2,3)) expect_equal(as_numeric(factor(c("d", "e", "f")), keep.labels = FALSE), c(1,2,3)) }) test_that("as_numeric", { expect_equal(as_numeric(factor(c(0,1,2)), start.at = 4, keep.labels = FALSE), c(4,5,6)) expect_equal(as_numeric(factor(c(2,3,4)), start.at = 4, keep.labels = FALSE), c(4,5,6)) expect_equal(as_numeric(factor(c("a", "b", "c")), start.at = 4, keep.labels = FALSE), c(4,5,6)) expect_equal(as_numeric(factor(c("d", "e", "f")), start.at = 4, keep.labels = FALSE), c(4,5,6)) }) sjlabelled/tests/testthat.R0000644000176200001440000000010413446531213015475 0ustar liggesuserslibrary(testthat) library(sjlabelled) test_check("sjlabelled") sjlabelled/vignettes/0000755000176200001440000000000014046506732014372 5ustar liggesuserssjlabelled/vignettes/labelleddata.Rmd0000644000176200001440000000720714046441130017430 0ustar liggesusers--- title: "Working with Labelled Data" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Working with Labelled Data} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ``` This vignette shows a small example how functions to work with labelled data can be implemented in a typical data visualization workflow. # Labelled Data In software like SPSS, it is common to have value and variable labels as variable attributes. Variable values, even if categorical, are mostly numeric. In R, however, you may use labels as values directly: ```{r} factor(c("low", "high", "mid", "high", "low")) ``` Reading SPSS-data with **haven** or **sjlabelled** keeps the numeric values for variables and adds the value and variable labels as attributes. See following example from the sample-dataset efc, which is part of the **sjlabelled**-package: ```{r} library(sjlabelled) data(efc) str(efc$e42dep) ``` While all plotting and table functions of the [sjPlot-package](https://cran.r-project.org/package=sjPlot) make use of these attributes, many packages and/or functions do not consider these attributes, e.g. R base graphics: ```{r warning=FALSE, fig.height=6, fig.width=7} library(sjlabelled) data(efc) barplot( table(efc$e42dep, efc$e16sex), beside = TRUE, legend.text = TRUE ) ``` As you can see in the above figure, the plot has neither axis nor legend labels. # Adding value labels as factor values `as_label()` is a sjlabelled-function that converts a numeric variable into a factor and sets attribute-value-labels as factor levels. When using factors with valued levels, the bar plot will be labelled. ```{r warning=FALSE, fig.height=6, fig.width=7} barplot( table(sjlabelled::as_label(efc$e42dep), sjlabelled::as_label(efc$e16sex)), beside = TRUE, legend.text = TRUE ) ``` # Getting and setting value and variable labels There are four functions that let you easily set or get value and variable labels of either a single vector or a complete data frame: * `get_label()` to get variable labels * `get_labels()` to get value labels * `set_label()` to set variable labels (add them as vector attribute) * `set_labels()` to set value labels (add them as vector attribute) With this function, you can easily add titles to plots dynamically, i.e. depending on the variable that is plotted. ```{r warning=FALSE, fig.height=6, fig.width=7} barplot( table(sjlabelled::as_label(efc$e42dep), sjlabelled::as_label(efc$e16sex)), beside = TRUE, legend.text = TRUE, main = get_label(efc$e42dep) ) ``` # Restore labels from subsetted data The base `subset()` function drops label attributes (or vector attributes in general) when subsetting data. In the sjlabelled-package, there are handy functions to deal with this problem: `copy_labels()` and `remove_labels()`. `copy_labels()` adds back labels to a subsetted data frame based on the original data frame. And `remove_labels()` removes all label attributes. ## Losing labels during subset ```{r} efc.sub <- subset(efc, subset = e16sex == 1, select = c(4:8)) str(efc.sub) ``` ## Add back labels ```{r, message=FALSE} efc.sub <- copy_labels(efc.sub, efc) str(efc.sub) ``` # Conclusion When working with labelled data, especially when working with data sets imported from other software packages, it comes very handy to make use of the label attributes. The **sjlabelled**-package supports this feature and offers useful functions for these tasks. sjlabelled/vignettes/quasiquotation.Rmd0000644000176200001440000001374113647275302020134 0ustar liggesusers--- title: "Using quasiquotation to add variable and value labels" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Using quasiquotation to add variable and value labels} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("rlang", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ``` Labelling data is typically a task for end-users and is applied in own scripts or functions rather than in packages. However, sometimes it can be useful for both end-users and package developers to have a flexible way to add variable and value labels to their data. In such cases, [quasiquotation](https://adv-r.hadley.nz/quasiquotation.html) is helpful. This vignette demonstrate how to use quasiquotation in _sjlabelled_ to label your data. ## Adding value labels to variables using quasiquotation Usually, `set_labels()` can be used to add value labels to variables. The syntax of this function is easy to use, and `set_labels()` allows to add value labels to multiple variables at once, if these variables share the same value labels. In the following examples, we will use the `frq()` function, that shows an extra **label**-column containing _value labels_, if the data is labelled. If the data has _no_ value labels, this column is not shown in the output. ```{r message=FALSE, warning=FALSE} library(sjlabelled) library(sjmisc) # for frq()-function library(rlang) # unlabelled data dummies <- data.frame( dummy1 = sample(1:3, 40, replace = TRUE), dummy2 = sample(1:3, 40, replace = TRUE), dummy3 = sample(1:3, 40, replace = TRUE) ) # set labels for all variables in the data frame test <- set_labels(dummies, labels = c("low", "mid", "hi")) attr(test$dummy1, "labels") frq(test, dummy1) # and set same value labels for two of three variables test <- set_labels( dummies, dummy1, dummy2, labels = c("low", "mid", "hi") ) frq(test) ``` `val_labels()` does the same job as `set_labels()`, but in a different way. While `set_labels()` requires variables to be specified in the `...`-argument, and labels in the `labels`-argument, `val_labels()` requires both to be specified in the `...`. `val_labels()` requires _named_ vectors as argument, with the _left-hand side_ being the name of the variable that should be labelled, and the _right-hand side_ containing the labels for the values. ```{r message=FALSE, warning=FALSE} test <- val_labels(dummies, dummy1 = c("low", "mid", "hi")) attr(test$dummy1, "labels") # remaining variables are not labelled frq(test) ``` Unlike `set_labels()`, `val_labels()` allows the user to add _different_ value labels to different variables in one function call. Another advantage, or difference, of `val_labels()` is it's flexibility in defining variable names and value labels by using quasiquotation. ### Add labels that are stored in a vector To use quasiquotation, we need the **rlang** package to be installed and loaded. Now we can have labels in a character vector, and use `!!` to unquote this vector. ```{r message=FALSE, warning=FALSE} labels <- c("low_quote", "mid_quote", "hi_quote") test <- val_labels(dummies, dummy1 = !! labels) attr(test$dummy1, "labels") ``` ### Define variable names that are stored in a vector The same can be done with the names of _variables_ that should get new value labels. We then need `!!` to unquote the variable name and `:=` as assignment. ```{r message=FALSE, warning=FALSE} variable <- "dummy2" test <- val_labels(dummies, !! variable := c("lo_var", "mid_var", "high_var")) # no value labels attr(test$dummy1, "labels") # value labels attr(test$dummy2, "labels") ``` ### Both variable names and value labels are stored in a vector Finally, we can combine the above approaches to be flexible regarding both variable names and value labels. ```{r message=FALSE, warning=FALSE} variable <- "dummy3" labels <- c("low", "mid", "hi") test <- val_labels(dummies, !! variable := !! labels) attr(test$dummy3, "labels") ``` ## Adding variable labels using quasiquotation `set_label()` is the equivalent to `set_labels()` to add variable labels to a variable. The equivalent to `val_labels()` is `var_labels()`, which works in the same way as `val_labels()`. In case of _variable_ labels, a `label`-attribute is added to a vector or factor (instead of a `labels`-attribute, which is used for _value_ labels). The following examples show how to use `var_labels()` to add variable labels to the data. We demonstrate this function without further explanation, because it is actually very similar to `val_labels()`. ```{r message=FALSE, warning=FALSE} dummy <- data.frame( a = sample(1:4, 10, replace = TRUE), b = sample(1:4, 10, replace = TRUE), c = sample(1:4, 10, replace = TRUE) ) # simple usage test <- var_labels(dummy, a = "first variable", c = "third variable") attr(test$a, "label") attr(test$b, "label") attr(test$c, "label") # quasiquotation for labels v1 <- "First variable" v2 <- "Second variable" test <- var_labels(dummy, a = !! v1, b = !! v2) attr(test$a, "label") attr(test$b, "label") attr(test$c, "label") # quasiquotation for variable names x1 <- "a" x2 <- "c" test <- var_labels(dummy, !! x1 := "First", !! x2 := "Second") attr(test$a, "label") attr(test$b, "label") attr(test$c, "label") # quasiquotation for both variable names and labels test <- var_labels(dummy, !! x1 := !! v1, !! x2 := !! v2) attr(test$a, "label") attr(test$b, "label") attr(test$c, "label") ``` ## Conclusion As we have demonstrated, `var_labels()` and `val_labels()` are one of the most flexible and easy-to-use ways to add value and variable labels to our data. Another advantage is the consistent design of all functions in **sjlabelled**, which allows seamless integration into pipe-workflows. sjlabelled/vignettes/intro_sjlabelled.Rmd0000644000176200001440000003404713647275404020367 0ustar liggesusers--- title: "Labelled Data and the sjlabelled-Package" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Labelled Data and the sjlabelled-Package} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("haven", quietly = TRUE) || !requireNamespace("magrittr", quietly = TRUE) || !requireNamespace("dplyr", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ``` This package provides functions to read and write data between R and other statistical software packages like _SPSS_, _SAS_ or _Stata_ and to work with labelled data; this includes easy ways to get and set label attributes, to convert labelled vectors into factors (and vice versa), or to deal with multiple declared missing values etc. This vignette gives an overview of functions to work with labelled data. # Labelled Data _Labelled data_ (or labelled vectors) is a common data structure in other statistical environments to store meta-information about variables, like variable names, value labels or multiple defined missing values. Labelled data not only extends **R**'s capabilities to deal with proper value _and_ variable labels, but also facilitates the representation of different types of missing values, like in other statistical software packages. Typically, in R, multiple declared missings cannot be represented in a similar way, like in 'SPSS' or 'SAS', with the regular missing values. However, the **haven**-package introduced `tagged_na` values, which can do this. Tagged NA's work exactly like regular R missing values except that they store one additional byte of information: a tag, which is usually a letter ("a" to "z") or also may be a character number ("0" to "9"). This allows to indicate different missings. Functions of **sjlabelled** do not necessarily require vectors of class `labelled` or `haven_labelled`. The `labelled` class, implemented by the packages **haven** and **labelled**, may cause troubles with other packages, thus it's only intended as being an intermediate data structure that should be converted to common R classes. However, coercing a `labelled` vector to other classes (like factor or numeric) typically means that meta information like value and variable label attributes are lost. Actually, there is no need to drop these attributes for non-`labelled`-class vectors. Functions like `lm()` simply copy these attributes to the data that is included in the returned object. Packages like **sjPlot** support labelled data for easily annotated data visualization. **sjlabelled** supports working with _labelled data_ and offers functions to benefit from these features. **Note:** Since package-version 2.0 of the **haven**-package, the `labelled`-class attribute was changed to `haven_labelled`, to avoid interferences with the **Hmisc**-package. ## Labelled Data in haven and labelled The **labelled**-package is intended to support `labelled` / `haven_labelled` metadata structures, thus the data structure of labelled vectors in **haven** and **labelled** is the same. Labelled data in this format stores information about value labels, variable names and multiple defined missing values. However, _variable names_ are only part of this information if data was imported with one of **haven**'s read-functions. Adding a variable label attribute is (at least up to version 1.0.0) not possible via the `labelled()`-constructor method. ```{r} library(haven) x <- labelled( c(1:3, tagged_na("a", "c", "z"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) ) print(x) ``` A `labelled` vector can either be a numeric or character vector. Conversion to factors copies the value labels as factor levels, but drops the label attributes and missing information: ```{r} is.na(x) as_factor(x) is.na(as_factor(x)) ``` ## Labelled Data in sjlabelled **sjlabelled** supports label attributes in **haven**-style (`label` and `labels`). You're not restricted to the `labelled` class for vectors when working with **sjlabelled** and labelled data. Hence, you can have vectors of common R classes and still use information like variable or value labels. ```{r message=FALSE} library(sjlabelled) # sjlabelled-sample data, an atomic vector with label attributes data(efc) str(efc$e16sex) ``` # Value Labels ## Getting value labels The `get_labels()`-method is a generic method to return value labels of a vector or data frame. ```{r} get_labels(efc$e42dep) ``` You can prefix the value labels with the associated values or return them as named vector with the `values` argument. ```{r} get_labels(efc$e42dep, values = "p") ``` `get_labels()` also returns "labels" of factors, even if the factor has no label attributes. ```{r} x <- factor(c("low", "mid", "low", "hi", "mid", "low")) get_labels(x) ``` To ensure that labels are only returned for vectors with label-attribute, use the `attr.only` argument. ```{r} x <- factor(c("low", "mid", "low", "hi", "mid", "low")) get_labels(x, attr.only = TRUE) ``` If a vector has a label attribute, only these labels are returned. Non-labelled values are excluded from the output by default... ```{r} # get labels, including tagged NA values x <- labelled( c(1:3, tagged_na("a", "c", "z"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) ) get_labels(x) ``` ... however, you can add non-labelled values to the return value as well, using the `non.labelled` argument. ```{r} get_labels(x, non.labelled = TRUE) ``` Tagged missing values can also be included in the output, using the `drop.na` argument. ```{r} get_labels(x, values = "n", drop.na = FALSE) ``` ## Getting labelled values The `get_values()` method returns the values for labelled values (i.e. values that have an associated label). We still use the vector `x` from the above examples. ```{r} print(x) get_values(x) ``` With the `drop.na` argument you can omit those values from the return values that are defined as missing. ```{r} get_values(x, drop.na = TRUE) ``` ## Setting value labels With `set_labels()` you can add label attributes to any vector. ```{r} x <- sample(1:4, 20, replace = TRUE) # return new labelled vector x <- set_labels(x, labels = c("very low", "low", "mid", "hi")) x ``` If more labels than values are given, only as many labels elements are used as values are present. ```{r} x <- c(2, 2, 3, 3, 2) x <- set_labels(x, labels = c("a", "b", "c")) x ``` However, you can force to use all labels, even for values that are not in the vector, using the `force.labels` argument. ```{r} x <- c(2, 2, 3, 3, 2) x <- set_labels( x, labels = c("a", "b", "c"), force.labels = TRUE ) x ``` For vectors with more unique values than labels, additional labels for non-labelled values are added. ```{r} x <- c(1, 2, 3, 2, 4, NA) x <- set_labels(x, labels = c("yes", "maybe", "no")) x ``` Use `force.values` to add only those labels that have been passed as argument. ```{r} x <- c(1, 2, 3, 2, 4, NA) x <- set_labels( x, labels = c("yes", "maybe", "no"), force.values = FALSE ) x ``` To add explicit labels for values (without adding more labels than wanted and without dropping labels for values that do not appear in the vector), use a named vector of labels as argument. The arguments `force.values` and `force.labels` are ignored when using named vectors. ```{r} x <- c(1, 2, 3, 2, 4, 5) x <- set_labels( x, labels = c("strongly agree" = 1, "totally disagree" = 4, "refused" = 5, "missing" = 9) ) x ``` If you want to set different value labels for a complete data frame, if you provide the labels as a `list`. For each variable in the data frame, provide a list element with value labels as character vector. Note that the length of the list must be equal to the number of variables (columns) in the data frame. ```{r} tmp <- data.frame( a = c(1, 2, 3), b = c(1, 2, 3), c = c(1, 2, 3) ) labels <- list( c("one", "two", "three"), c("eins", "zwei", "drei"), c("un", "dos", "tres") ) tmp <- set_labels(tmp, labels = labels) str(tmp) ``` You can use `set_labels()` within a pipe-workflow with _dplyr_. ```{r message=FALSE} library(dplyr) library(sjmisc) # for frq() data(efc) efc %>% select(c82cop1, c83cop2, c84cop3) %>% set_labels(labels = c("not often" = 1, "very often" = 4)) %>% frq() ``` # Variable Labels ## Getting variable labels The `get_label()`-method returns the variable label of a vector or all variable labels from a data frame. ```{r} get_label(efc$e42dep) get_label(efc, e42dep, e16sex, e15relat) ``` If a vector has no variable label, `NULL` is returned. However, `get_label()` also allows returning a standard value instead of `NULL`, in case the vector has no label attribute. This is useful to combine with `deparse(substitute())` in function calls, so - for instance - the name of the vector can be used as default value if no variable labels are present. ```{r} dummy <- c(1, 2, 3) testit <- function(x) get_label(x, def.value = deparse(substitute(x))) # returns name of vector, if it has no variable label testit(dummy) ``` If you want human-readable labels, you can use the `case`-argument, which will pass the labels to a string parser in the [snakecase-package](https://cran.r-project.org/package=snakecase). ```{r} data(iris) # returns no labels, because iris-data is not labelled get_label(iris) # returns the column name as default labels, if data is not labelled get_label(iris, def.value = colnames(iris)) # labels are parsed in a readable way get_label(iris, def.value = colnames(iris), case = "parsed") ``` ## Setting variable labels The `set_label()` function adds the variable label attribute to a vector. You can either return a new vector, or label an existing vector ```{r} x <- sample(1:4, 10, replace = TRUE) # return new vector x <- set_label(x, label = "Dummy-variable") str(x) # label existing vector set_label(x) <- "Another Dummy-variable" str(x) ``` `set_label()` can also set variable labels for a data frame. In this case, the variable attributes get an additional `name` attribute with the vector's name. This makes it easier to see which label belongs to which vector. ```{r} x <- data.frame( a = sample(1:4, 10, replace = TRUE), b = sample(1:4, 10, replace = TRUE), c = sample(1:4, 10, replace = TRUE) ) x <- set_label(x, label = c("Variable A", "Variable B", "Variable C")) str(x) get_label(x) ``` An alternative to `set_label()` is `var_labels()`, which also works within pipe-workflows. `var_labels()` requires named vectors as arguments to match the column names of the input, and set the associated variable labels. ```{r} x <- data.frame( a = sample(1:4, 10, replace = TRUE), b = sample(1:4, 10, replace = TRUE), c = sample(1:4, 10, replace = TRUE) ) library(magrittr) # for pipe x %>% var_labels( a = "Variable A", b = "Variable B", c = "Variable C" ) %>% str() ``` # Missing Values ## Defining missing values `set_na()` converts values of a vector or of multiple vectors in a data frame into `NA`s. With `as.tag = TRUE`, `set_na()` creates tagged `NA` values, which means that these missing values get an information tag and a value label (which is, by default, the former value that was converted to NA). You can either return a new vector/data frame, or set `NA`s into an existing vector/data frame. ```{r} x <- sample(1:8, 100, replace = TRUE) # show value distribution table(x) # set value 1 and 8 as tagged missings x <- set_na(x, na = c(1, 8), as.tag = TRUE) x # show value distribution, including missings table(x, useNA = "always") # now let's see, which NA's were "1" and which were "8" print_tagged_na(x) x <- factor(c("a", "b", "c")) x # set NA into existing vector x <- set_na(x, na = "b", as.tag = TRUE) x ``` ## Getting missing values The `get_na()` function returns all tagged NA values. We still use the vector `x` from the previous example. ```{r} get_na(x) ``` To see the tags of the NA values, use the `as.tag` argument. ```{r} get_na(x, as.tag = TRUE) ``` ## Replacing specific NA with values While `set_na()` allows you to replace values with (tagged) NA's, `replace_na()` (from package **sjmisc**) allows you to replace either all NA values of a vector or specific tagged NA values with a non-NA value. ```{r} library(sjmisc) # for replace_na() data(efc) str(efc$c84cop3) efc$c84cop3 <- set_na(efc$c84cop3, na = c(2, 3), as.tag = TRUE) get_na(efc$c84cop3, as.tag = TRUE) # this would replace all NA's into "2" dummy <- replace_na(efc$c84cop3, value = 2) # labels of former tagged NA's are preserved get_labels(dummy, drop.na = FALSE, values = "p") get_na(dummy, as.tag = TRUE) # No more NA values frq(dummy) # In this example, the tagged NA(2) is replaced with value 2 # the new value label for value 2 is "restored NA" dummy <- replace_na(efc$c84cop3, value = 2, na.label = "restored NA", tagged.na = "2") # Only one tagged NA remains get_labels(dummy, drop.na = FALSE, values = "p") get_na(dummy, as.tag = TRUE) # Some NA values remain frq(dummy) ``` ## Replacing values labels With `replace_labels()`, you can replace (change) value labels of labelled values. This can also be used to change the labels of tagged missing values. Make sure to know the missing tag, which can be accessed via `get_na()`. ```{r} str(efc$c82cop1) efc$c82cop1 <- set_na(efc$c82cop1, na = c(2, 3), as.tag = TRUE) get_na(efc$c82cop1, as.tag = TRUE) efc$c82cop1 <- replace_labels(efc$c82cop1, labels = c("new NA label" = tagged_na("2"))) get_na(efc$c82cop1, as.tag = TRUE) ``` sjlabelled/R/0000755000176200001440000000000014046506734012565 5ustar liggesuserssjlabelled/R/copy_labels.R0000644000176200001440000000646514046456421015214 0ustar liggesusers#' @title Copy value and variable labels to (subsetted) data frames #' @name copy_labels #' #' @description Subsetting-functions usually drop value and variable labels from #' subsetted data frames (if the original data frame has value and variable #' label attributes). This function copies these value and variable #' labels back to subsetted data frames that have been subsetted, for instance, #' with \code{\link{subset}}. #' #' @param df_new The new, subsetted data frame. #' @param df_origin The original data frame where the subset (\code{df_new}) stems from; #' use \code{NULL}, if value and variable labels from \code{df_new} should be removed. #' @inheritParams as_factor #' #' @return Returns \code{df_new} with either removed value and variable label attributes #' (if \code{df_origin = NULL}) or with copied value and variable label #' attributes (if \code{df_origin} was the original subsetted data frame). #' #' @note In case \code{df_origin = NULL}, all possible label attributes #' from \code{df_new} are removed. #' #' @examples #' data(efc) #' #' # create subset - drops label attributes #' efc.sub <- subset(efc, subset = e16sex == 1, select = c(4:8)) #' str(efc.sub) #' #' # copy back attributes from original dataframe #' efc.sub <- copy_labels(efc.sub, efc) #' str(efc.sub) #' #' # remove all labels #' efc.sub <- copy_labels(efc.sub) #' str(efc.sub) #' #' # create subset - drops label attributes #' efc.sub <- subset(efc, subset = e16sex == 1, select = c(4:8)) #' if (require("dplyr")) { #' # create subset with dplyr's select - attributes are preserved #' efc.sub2 <- select(efc, c160age, e42dep, neg_c_7, c82cop1, c84cop3) #' # copy labels from those columns that are available #' copy_labels(efc.sub, efc.sub2) %>% str() #' } #' #' # copy labels from only some columns #' str(copy_labels(efc.sub, efc, e42dep)) #' str(copy_labels(efc.sub, efc, -e17age)) #' @export copy_labels <- function(df_new, df_origin = NULL, ...) { # check if old df is NULL. if so, we remove all labels # from the data frame. if (is.null(df_origin)) { # tell user message("Removing all variable and value labels from data frame.") # remove all labels df_new <- remove_all_labels(df_new) } else { # check params if (is.data.frame(df_new) && is.data.frame(df_origin)) { # get matching colnames, because we only copy attributes from variables # that also exist in the new data frame (of course) cn <- intersect(colnames(df_new), colnames(df_origin)) dots <- as.character(match.call(expand.dots = FALSE)$`...`) .dat <- .get_dot_data(df_origin, dots) cn <- intersect(cn, names(.dat)) for (i in cn) { # copy variable and value labels attr(df_new[[i]], "label") <- attr(df_origin[[i]], "label", exact = TRUE) attr(df_new[[i]], "labels") <- attr(df_origin[[i]], "labels", exact = TRUE) attr(df_new[[i]], "na_values") <- attr(df_origin[[i]], "na_values", exact = TRUE) attr(df_new[[i]], "na.values") <- attr(df_origin[[i]], "na.values", exact = TRUE) } } else { warning("Both `df_origin` and `df_new` must be of class `data.frame`.", call. = FALSE) } } df_new } sjlabelled/R/as_character.R0000644000176200001440000000244213675077136015337 0ustar liggesusers#' @rdname as_label #' @export as_character <- function(x, ...) { UseMethod("as_character") } #' @rdname as_label #' @export to_character <- as_character #' @export as_character.default <- function(x, add.non.labelled = FALSE, prefix = FALSE, var.label = NULL, drop.na = TRUE, drop.levels = FALSE, ...) { as_character_helper(x, add.non.labelled, prefix, var.label, drop.na, drop.levels) } #' @rdname as_label #' @export as_character.data.frame <- function(x, ..., add.non.labelled = FALSE, prefix = FALSE, var.label = NULL, drop.na = TRUE, drop.levels = FALSE, keep.labels = FALSE) { dots <- sapply(eval(substitute(alist(...))), deparse) .dat <- .get_dot_data(x, dots) # iterate variables of data frame for (i in colnames(.dat)) { x[[i]] <- as_character_helper(.dat[[i]], add.non.labelled, prefix, var.label, drop.na, drop.levels) } x } as_character_helper <- function(x, add.non.labelled = FALSE, prefix = FALSE, var.label = NULL, drop.na = TRUE, drop.levels = FALSE) { # get variable labels vl <- get_label(x) # to character x <- as.character(as_label_helper(x, add.non.labelled, prefix, var.label, drop.na, drop.levels, keep.labels = FALSE)) # set back variable labels, if any if (!is.null(vl)) x <- set_label(x, vl) x } sjlabelled/R/set_labels.R0000644000176200001440000004417514046446767015051 0ustar liggesusers#' @title Add value labels to variables #' @name set_labels #' #' @description This function adds labels as attribute (named \code{"labels"}) #' to a variable or vector \code{x}, resp. to a set of variables in a #' data frame or a list-object. A use-case is, for instance, the #' \pkg{sjPlot}-package, which supports labelled data and automatically #' assigns labels to axes or legends in plots or to be used in tables. #' \code{val_labels()} is intended for use within pipe-workflows and has a #' tidyverse-consistent syntax, including support for quasi-quotation #' (see 'Examples'). #' #' @seealso See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package} #' for more details; \code{\link{set_label}} to manually set variable labels or #' \code{\link{get_label}} to get variable labels; \code{\link{add_labels}} to #' add additional value labels without replacing the existing ones. #' #' @param labels (Named) character vector of labels that will be added to \code{x} as #' \code{"labels"} or \code{"value.labels"} attribute. #' \itemize{ #' \item if \code{labels} is \strong{not} a \emph{named vector}, its length must equal the value range of \code{x}, i.e. if \code{x} has values from 1 to 3, \code{labels} should have a length of 3; #' \item if length of \code{labels} is intended to differ from length of unique values of \code{x}, a warning is given. You can still add missing labels with the \code{force.labels} or \code{force.values} arguments; see 'Note'. #' \item if \code{labels} \strong{is} a \emph{named vector}, value labels will be set accordingly, even if \code{x} has a different length of unique values. See 'Note' and 'Examples'. #' \item if \code{x} is a data frame, \code{labels} may also be a \code{list} of (named) character vectors; #' \item if \code{labels} is a \code{list}, it must have the same length as number of columns of \code{x}; #' \item if \code{labels} is a vector and \code{x} is a data frame, \code{labels} will be applied to each column of \code{x}. #' } #' Use \code{labels = ""} to remove labels-attribute from \code{x}. #' @param force.labels Logical; if \code{TRUE}, all \code{labels} are added as value label #' attribute, even if \code{x} has less unique values then length of \code{labels} #' or if \code{x} has a smaller range then length of \code{labels}. See 'Examples'. #' This parameter will be ignored, if \code{labels} is a named vector. #' @param force.values Logical, if \code{TRUE} (default) and \code{labels} has less #' elements than unique values of \code{x}, additional values not covered #' by \code{labels} will be added as label as well. See 'Examples'. #' This parameter will be ignored, if \code{labels} is a named vector. #' @param drop.na Logical, whether existing value labels of tagged NA values #' (see \code{\link[haven:tagged_na]{tagged_na}}) should be removed (\code{drop.na = TRUE}, #' the default) or preserved (\code{drop.na = FALSE}). #' See \code{\link{get_na}} for more details on tagged NA values. #' @param ... For \code{set_labels()}, Optional, unquoted names of variables that should be selected for #' further processing. Required, if \code{x} is a data frame (and no #' vector) and only selected variables from \code{x} should be processed. #' You may also use functions like \code{:} or tidyselect's #' select-helpers. \cr \cr For \code{val_labels()}, #' pairs of named vectors, where the name equals the variable name, which #' should be labelled, and the value is the new variable label. \code{val_labels()} #' also supports quasi-quotation (see 'Examples'). #' #' @inheritParams add_labels #' #' @return \code{x} with value label attributes; or with removed label-attributes if #' \code{labels = ""}. If \code{x} is a data frame, the complete data #' frame \code{x} will be returned, with removed or added to variables #' specified in \code{...}; if \code{...} is not specified, applies #' to all variables in the data frame. #' #' @note \itemize{ #' \item if \code{labels} is a named vector, \code{force.labels} and \code{force.values} will be ignored, and only values defined in \code{labels} will be labelled; #' \item if \code{x} has less unique values than \code{labels}, redundant labels will be dropped, see \code{force.labels}; #' \item if \code{x} has more unique values than \code{labels}, only matching values will be labelled, other values remain unlabelled, see \code{force.values}; #' } #' If you only want to change partial value labels, use \code{\link{add_labels}} instead. #' Furthermore, see 'Note' in \code{\link{get_labels}}. #' #' @examples #' if (require("sjmisc")) { #' dummy <- sample(1:4, 40, replace = TRUE) #' frq(dummy) #' #' dummy <- set_labels(dummy, labels = c("very low", "low", "mid", "hi")) #' frq(dummy) #' #' # assign labels with named vector #' dummy <- sample(1:4, 40, replace = TRUE) #' dummy <- set_labels(dummy, labels = c("very low" = 1, "very high" = 4)) #' frq(dummy) #' #' # force using all labels, even if not all labels #' # have associated values in vector #' x <- c(2, 2, 3, 3, 2) #' # only two value labels #' x <- set_labels(x, labels = c("1", "2", "3")) #' x #' frq(x) #' #' # all three value labels #' x <- set_labels(x, labels = c("1", "2", "3"), force.labels = TRUE) #' x #' frq(x) #' #' # create vector #' x <- c(1, 2, 3, 2, 4, NA) #' # add less labels than values #' x <- set_labels(x, labels = c("yes", "maybe", "no"), force.values = FALSE) #' x #' # add all necessary labels #' x <- set_labels(x, labels = c("yes", "maybe", "no"), force.values = TRUE) #' x #' #' # set labels and missings #' x <- c(1, 1, 1, 2, 2, -2, 3, 3, 3, 3, 3, 9) #' x <- set_labels(x, labels = c("Refused", "One", "Two", "Three", "Missing")) #' x #' set_na(x, na = c(-2, 9)) #' } #' #' #' if (require("haven") && require("sjmisc")) { #' x <- labelled( #' c(1:3, tagged_na("a", "c", "z"), 4:1), #' c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), #' "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) #' ) #' # get current NA values #' x #' get_na(x) #' # lose value labels from tagged NA by default, if not specified #' set_labels(x, labels = c("New Three" = 3)) #' # do not drop na #' set_labels(x, labels = c("New Three" = 3), drop.na = FALSE) #' #' #' # set labels via named vector, #' # not using all possible values #' data(efc) #' get_labels(efc$e42dep) #' #' x <- set_labels( #' efc$e42dep, #' labels = c(`independent` = 1, #' `severe dependency` = 2, #' `missing value` = 9) #' ) #' get_labels(x, values = "p") #' get_labels(x, values = "p", non.labelled = TRUE) #' #' # labels can also be set for tagged NA value #' # create numeric vector #' x <- c(1, 2, 3, 4) #' # set 2 and 3 as missing, which will automatically set as #' # tagged NA by 'set_na()' #' x <- set_na(x, na = c(2, 3)) #' x #' # set label via named vector just for tagged NA(3) #' set_labels(x, labels = c(`New Value` = tagged_na("3"))) #' #' # setting same value labels to multiple vectors #' dummies <- data.frame( #' dummy1 = sample(1:4, 40, replace = TRUE), #' dummy2 = sample(1:4, 40, replace = TRUE), #' dummy3 = sample(1:4, 40, replace = TRUE) #' ) #' #' # and set same value labels for two of three variables #' test <- set_labels( #' dummies, dummy1, dummy2, #' labels = c("very low", "low", "mid", "hi") #' ) #' # see result... #' get_labels(test) #' } #' #' # using quasi-quotation #' if (require("rlang") && require("dplyr")) { #' dummies <- data.frame( #' dummy1 = sample(1:4, 40, replace = TRUE), #' dummy2 = sample(1:4, 40, replace = TRUE), #' dummy3 = sample(1:4, 40, replace = TRUE) #' ) #' #' x1 <- "dummy1" #' x2 <- c("so low", "rather low", "mid", "very hi") #' #' dummies %>% #' val_labels( #' !!x1 := c("really low", "low", "a bit mid", "hi"), #' dummy3 = !!x2 #' ) %>% #' get_labels() #' #' # ... and named vectors to explicitly set value labels #' x2 <- c("so low" = 4, "rather low" = 3, "mid" = 2, "very hi" = 1) #' dummies %>% #' val_labels( #' !!x1 := c("really low" = 1, "low" = 3, "a bit mid" = 2, "hi" = 4), #' dummy3 = !!x2 #' ) %>% get_labels(values = "p") #' } #' @export set_labels <- function(x, ..., labels, force.labels = FALSE, force.values = TRUE, drop.na = TRUE) { dots <- as.character(match.call(expand.dots = FALSE)$`...`) .dat <- .get_dot_data(x, dots) # special handling for data frames if (is.data.frame(x)) { # check if we have one label per variable if (is.list(labels) && length(labels) == ncol(.dat)) { # get column names cn <- colnames(.dat) # iterate all columns by number for (i in seq_len(ncol(.dat))) { x[[cn[i]]] <- set_labels_helper( x = .dat[[cn[i]]], labels = labels[[i]], force.labels = force.labels, force.values = force.values, drop.na = drop.na, var.name = cn[i] ) } } else { # iterate variables of data frame for (i in colnames(.dat)) { x[[i]] <- set_labels_helper( x = .dat[[i]], labels = labels, force.labels = force.labels, force.values = force.values, drop.na = drop.na, var.name = i ) } } } else { x <- set_labels_helper( x = .dat, labels = labels, force.labels = force.labels, force.values = force.values, drop.na = drop.na, var.name = NULL ) } x } #' @importFrom stats na.omit set_labels_helper <- function(x, labels, force.labels, force.values, drop.na, var.name) { # any valid labels? if not, return vector if (is.null(labels) || length(labels) == 0) return(x) # valid vector? if (is.null(x)) { warning("Can't add value labels to NULL vectors", call. = FALSE) return(x) } # get labelled / tagged NAs, maybe for later use current.na <- get_na(x) # check for null if (!is.null(labels)) { # if labels is empty string, remove labels attribute if (length(labels) == 1 && nchar(labels, keepNA = FALSE) == 0) { attr(x, "labels") <- NULL # set labels for character vectors here! } else if (is.character(x)) { # string vectors can only get labels of type string if (typeof(labels) == typeof(x)) { # reverse names and labels dummy.labels <- names(labels) # but first check if we have named vector or not... if (is.null(dummy.labels)) { warning("`labels` must be a named vector.", call. = TRUE) } else { # names(dummy.labels) <- unname(labels) # attr(x, "labels") <- dummy.labels attr(x, "labels") <- labels } } else { warning("Character vectors can only get labels of same type.", call. = TRUE) } # set labels for numeric vectors or factors here } else { # determine value range vr <- get_value_range(x) # copy values to variables valrange <- vr$valrange minval <- vr$minval maxval <- vr$maxval # check for unlisting if (is.list(labels)) labels <- labels[[1]] # determine amount of labels and unique values lablen <- length(labels) values <- unique(stats::na.omit(as.vector(x))) if (.is_num_chr(values) || .is_num_fac(values)) { values <- as.numeric(values) } values <- sort(values) # set var name string if (isempty(var.name)) { name.string <- "x" } else { name.string <- var.name } # check for valid bounds of values if (is.infinite(valrange)) { warning(sprintf("Can't set value labels for \"%s\". Infinite value range.", name.string), call. = FALSE) # check if we have named vector. in this # case, just add these values } else if (!is.null(names(labels))) { if (!requireNamespace("haven", quietly = TRUE)) { stop("Package 'haven' required for this function. Please install it.") } # check names and value attributes. value labels # and values might be reversed if (!anyNA(suppressWarnings(as.numeric(names(labels)))) && anyNA(suppressWarnings(as.numeric(labels))) && !anyNA(suppressWarnings(as.numeric(values))) && !all(haven::is_tagged_na(labels))) { dummy.lab.values <- as.numeric(names((labels))) dummy.lab.labels <- as.character(labels) labels <- dummy.lab.values names(labels) <- dummy.lab.labels } # sort labels labels <- labels[order(labels)] # set attributes if (anyNA(suppressWarnings(as.numeric(labels)))) { # here we have also non-numeric labels, so we set # names as character string attr(x, "labels") <- labels } else { # we have only numeric labels, so we set them # as numeric values attr(x, "labels") <- as.numeric(labels) } names(attr(x, "labels")) <- as.character(names(labels)) # check for valid length of labels # if amount of labels and values are equal, # we assume matching labels } else if (length(values) == lablen) { # set attributes # check whether values is numeric, or - if character - # only has numeric character values. If yes, add values # as numeric labels-attribute if (is.numeric(values) || !anyNA(suppressWarnings(as.numeric(values)))) attr(x, "labels") <- as.numeric(values) else attr(x, "labels") <- as.character(values) # do we have an ordered factor? if (is.ordered(x)) labels <- labels[order(levels(x))] names(attr(x, "labels")) <- labels # check for valid length of labels # here, we have a smaller value range (i.e. less values) # than amount of labels } else if (valrange < lablen) { # do we want to force to set labels, even if we have more labels # than values in variable? if (force.labels) { attr(x, "labels") <- as.numeric(seq_len(lablen)) names(attr(x, "labels")) <- labels } else { # we have more labels than values, so just take as many # labes as values are present message(sprintf("More labels than values of \"%s\". Using first %i labels.", name.string, valrange)) attr(x, "labels") <- as.numeric(minval:maxval) names(attr(x, "labels")) <- labels[seq_len(valrange)] } # value range is larger than amount of labels. we may # have not continuous value range, e.g. "-2" as filter and # 1 to 4 as valid values, i.e. -1 and 0 are missing } else if (valrange > lablen) { # check if user wants to add missing values if (force.values) { # get amount of unique values valrange <- length(values) # still no match? if (valrange != lablen) { # check which one is longer, and get missing values add_values <- ifelse(valrange > lablen, valrange[-lablen], lablen[-valrange]) # add missing values to labels labels <- c(labels, as.character(add_values)) # tell user about modification message(sprintf("More values in \"%s\" than length of \"labels\". Additional values were added to labels.", name.string)) } # set attributes attr(x, "labels") <- as.numeric(seq_len(valrange)) names(attr(x, "labels")) <- labels } else { # tell user about modification message(sprintf("\"%s\" has more values than \"labels\", hence not all values are labelled.", name.string)) # drop values with no associated labels attr(x, "labels") <- as.numeric(seq_len(length(labels))) names(attr(x, "labels")) <- labels } } else { attr(x, "labels") <- as.numeric(minval:maxval) names(attr(x, "labels")) <- labels } } # keep NA's? if (!drop.na && !is.null(current.na) && length(current.na) > 0) attr(x, "labels") <- c(attr(x, "labels", exact = TRUE), current.na) } x } #' @importFrom stats na.omit get_value_range <- function(x) { # check if var is a factor if (is.factor(x)) { # check if we have numeric levels if (!is.num.fac(x)) { # retrieve levels. since levels are numeric, we # have minimum and maximum values minval <- 1 maxval <- nlevels(x) } else { # levels are not numeric. we need to convert them # first to retrieve minimum level, as numeric minval <- min(as.numeric(levels(x)), na.rm = TRUE) # check range, add minimum, so we have max maxval <- diff(range(as.numeric(levels(x)))) + minval } } else if (is.character(x)) { # if we have a character vector, we don't have # min and max values. instead, we count the # amount of unique string values minval <- 1 maxval <- length(unique(stats::na.omit(x))) } else if (all(is.na(x))) { minval <- 0 maxval <- 0 }else { # retrieve values minval <- as.numeric(min(x, na.rm = TRUE)) maxval <- as.numeric(max(x, na.rm = TRUE)) } # determine value range if (all(is.na(x))) { valrange <- 0 } else{ valrange <- maxval - minval + 1 } # return all list( minval = minval, maxval = maxval, valrange = valrange ) } sjlabelled/R/add_labels.R0000644000176200001440000001514414046443262014763 0ustar liggesusers#' @title Add, replace or remove value labels of variables #' @name add_labels #' #' @description These functions add, replace or remove value labels to or from variables. #' #' @seealso \code{\link{set_label}} to manually set variable labels or #' \code{\link{get_label}} to get variable labels; \code{\link{set_labels}} to #' add value labels, replacing the existing ones (and removing non-specified #' value labels). #' #' @param x A vector or data frame. #' @param labels \describe{ #' \item{For \code{add_labels()}}{A named (numeric) vector of labels #' that will be added to \code{x} as label attribute.} #' \item{For \code{remove_labels()}}{Either a numeric vector, indicating #' the position of one or more label attributes that should be removed; #' a character vector with names of label attributes that should be #' removed; or a \code{\link[haven:tagged_na]{tagged_na()}} to remove the labels #' from specific NA values.} #' } #' #' @inheritParams as_factor #' #' @return \code{x} with additional or removed value labels. If \code{x} #' is a data frame, the complete data frame \code{x} will be returned, #' with removed or added to variables specified in \code{...}; #' if \code{...} is not specified, applies to all variables in the #' data frame. #' #' @details \code{add_labels()} adds \code{labels} to the existing value #' labels of \code{x}, however, unlike \code{\link{set_labels}}, it #' does \emph{not} remove labels that were \emph{not} specified in #' \code{labels}. \code{add_labels()} also replaces existing #' value labels, but preserves the remaining labels. #' \cr \cr #' \code{remove_labels()} is the counterpart to \code{add_labels()}. #' It removes labels from a label attribute of \code{x}. #' \cr \cr #' \code{replace_labels()} is an alias for \code{add_labels()}. #' #' @examples #' # add_labels() #' data(efc) #' get_labels(efc$e42dep) #' #' x <- add_labels(efc$e42dep, labels = c(`nothing` = 5)) #' get_labels(x) #' #' if (require("dplyr")) { #' x <- efc %>% #' # select three variables #' dplyr::select(e42dep, c172code, c161sex) %>% #' # only add new label to two of those #' add_labels(e42dep, c172code, labels = c(`nothing` = 5)) #' # see data frame, with selected variables having new labels #' get_labels(x) #' } #' #' x <- add_labels(efc$e42dep, labels = c(`nothing` = 5, `zero value` = 0)) #' get_labels(x, values = "p") #' #' # replace old value labels #' x <- add_labels( #' efc$e42dep, #' labels = c(`not so dependent` = 4, `lorem ipsum` = 5) #' ) #' get_labels(x, values = "p") #' #' # replace specific missing value (tagged NA) #' if (require("haven")) { #' x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), #' c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), #' "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) #' # get current NA values #' x #' # tagged NA(c) has currently the value label "First", will be #' # replaced by "Second" now. #' replace_labels(x, labels = c("Second" = tagged_na("c"))) #' } #' #' #' # remove_labels() #' #' x <- remove_labels(efc$e42dep, labels = 2) #' get_labels(x, values = "p") #' #' x <- remove_labels(efc$e42dep, labels = "independent") #' get_labels(x, values = "p") #' #' if (require("haven")) { #' x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), #' c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), #' "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) #' # get current NA values #' get_na(x) #' get_na(remove_labels(x, labels = tagged_na("c"))) #' } #' @export add_labels <- function(x, ..., labels) { # check for valid value. value must be a named vector if (is.null(labels)) stop("`labels` is NULL.", call. = FALSE) if (is.null(names(labels))) stop("`labels` must be a named vector.", call. = FALSE) # evaluate arguments, generate data dots <- as.character(match.call(expand.dots = FALSE)$`...`) .dat <- .get_dot_data(x, dots) if (is.data.frame(x)) { # iterate variables of data frame for (i in colnames(.dat)) { x[[i]] <- add_labels_helper(.dat[[i]], value = labels) } } else { x <- add_labels_helper(.dat, value = labels) } x } add_labels_helper <- function(x, value) { # get current labels of `x` current.labels <- get_labels( x, attr.only = TRUE, values = "n", non.labelled = FALSE, drop.na = TRUE ) # get current NA values current.na <- get_na(x) # if we had already labels, append new ones if (!is.null(current.labels)) { # remove multiple value labels doubles <- names(current.labels) %in% as.character(value) # switch value and names attribute, since get_labels() # returns the values as names, and the value labels # as "vector content" val.switch <- as.numeric(names(current.labels)) names(val.switch) <- as.character(current.labels) # update all labels all.labels <- c(val.switch[!doubles], value) # tell user if (any(doubles)) { message(sprintf( "label '%s' was replaced with new value label.\n", current.labels[doubles] )) } } else { all.labels <- value } if (requireNamespace("haven", quietly = TRUE)) { # replace tagged NA if (any(haven::is_tagged_na(value))) { # get tagged NAs value_tag <- haven::na_tag(value)[haven::is_tagged_na(value)] cna_tag <- haven::na_tag(current.na) # find matches (replaced NA), i.e. see if 'x' has any # tagged NA values that match the tagged NA specified in 'value' doubles <- na.omit(match(value_tag, cna_tag)) # tell user if we found any tagged NA, and that these will be replaced if (any(doubles)) { message(sprintf( "tagged NA '%s' was replaced with new value label.\n", names(current.na)[doubles] )) } # remove multiple tagged NA current.na <- current.na[-doubles] } } # sort labels by values all.labels <- all.labels[order(as.numeric(all.labels))] # add NA if (!is.null(current.na)) all.labels <- c(all.labels, current.na) # set back value labels attr(x, "labels") <- all.labels x } #' @rdname add_labels #' @export replace_labels <- function(x, ..., labels) { add_labels(x = x, ..., labels = labels) } sjlabelled/R/utils_get_dots.R0000644000176200001440000000641413725170746015750 0ustar liggesusers# function to evaluate dots in a tidyselect-style and return # the variable names as character vector .get_dot_data <- function(dat, dots, verbose = TRUE) { if (!is.data.frame(dat) || length(dots) == 0) { return(dat) } columns <- colnames(dat) x <- unlist(lapply(dots, function(i) { # contains-token if (grepl("^contains\\(", i)) { pattern <- gsub("contains\\(\"(.*)\"\\)", "\\1", i) columns[string_contains(pattern, columns)] # starts-with token } else if (grepl("^starts\\(", i) || grepl("^starts_with\\(", i)) { pattern <- gsub("(.*)\\(\"(.*)\"\\)", "\\2", i) columns[string_starts_with(pattern, columns)] # ends-with token } else if (grepl("^ends\\(", i) || grepl("^ends_with\\(", i)) { pattern <- gsub("(.*)\\(\"(.*)\"\\)", "\\2", i) columns[string_ends_with(pattern, columns)] # one-of token } else if (grepl("^one_of\\(", i)) { pattern <- gsub("(\"|\\s)", "", unlist(strsplit(gsub("one_of\\(\"(.*)\"\\)", "\\1", i), ","))) columns[string_one_of(pattern, columns)] # num_range token } else if (grepl("^num_range\\(", i)) { columns[match(.get_num_range(i), columns)] # from-to token } else if (grepl(":", i, fixed = TRUE)) { tmp <- unlist(strsplit(i, ":", fixed = TRUE)) start <- if (.is_num_chr(tmp[1])) as.numeric(tmp[1]) else which(columns == tmp[1]) end <- if (.is_num_chr(tmp[2])) as.numeric(tmp[2]) else which(columns == tmp[2]) columns[start:end] # simple name } else { i } })) x <- unlist(lapply(x, function(i) { if (.is_num_chr(i)) columns[as.numeric(i)] else if (.is_num_fac(i)) columns[as.numeric(as.character(i))] else i })) not_found <- setdiff(x, columns) if (length(not_found) && isTRUE(verbose)) { insight::print_color(sprintf( "%i variables were not found in the dataset: %s\n", length(not_found), paste0(not_found, collapse = ", ") ), color = "red") } dat[, intersect(x, columns), drop = FALSE] } #' @importFrom stats na.omit .is_num_chr <- function(x) { is.character(x) && !anyNA(suppressWarnings(as.numeric(stats::na.omit(x)))) } .is_num_fac <- function(x) { is.factor(x) && !anyNA(suppressWarnings(as.numeric(levels(x)))) } .get_num_range <- function(i) { r1 <- trimws(unlist(strsplit(gsub("num_range\\((.*)\\)", "\\1", i), ","))) r2 <- gsub("\"", "", trimws(gsub("(.*)(=)(.*)", "\\3", r1)), fixed = TRUE) es <- grepl("=", r1) if (any(es)) { names(r2)[es] <- trimws(gsub("(.*)(=)(.*)", "\\1", r1[es])) } args <- c("prefix", "range", "width") if (is.null(names(r2))) { names(r2) <- args[1:length(r2)] } na_names <- which(is.na(names(r2))) if (length(na_names)) { names(r2)[na_names] <- args[na_names] } if (length(r2) > 3) { r2 <- r2[1:3] } from <- as.numeric(gsub("(\\d):(.*)", "\\1", r2["range"])) to <- as.numeric(gsub("(.*):(\\d)", "\\2", r2["range"])) width <- as.numeric(r2["width"]) if (is.na(width)) { sprintf("%s%i", r2["prefix"], from:to) } else { sprintf("%s%.*i", r2["prefix"], width, from:to) } } sjlabelled/R/get_labels.R0000644000176200001440000002416313675075312015017 0ustar liggesusers#' @title Retrieve value labels of labelled data #' @name get_labels #' #' @description This function returns the value labels of labelled data. #' #' @seealso See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package} #' for more details; \code{\link{set_labels}} to manually set value #' labels, \code{\link{get_label}} to get variable labels and #' \code{\link{get_values}} to retrieve the values associated #' with value labels. #' #' @param x A data frame with variables that have value label attributes (e.g. #' from an imported SPSS, SAS or STATA data set, via \code{\link{read_spss}}, #' \code{\link{read_sas}} or \code{\link{read_stata}}); a variable #' (vector) with value label attributes; or a \code{list} of variables #' with values label attributes. If \code{x} has no label attributes, #' factor levels are returned. See 'Examples'. #' @param values String, indicating whether the values associated with the #' value labels are returned as well. If \code{values = "as.name"} #' (or \code{values = "n"}), values are set as \code{names} #' attribute of the returned object. If \code{values = "as.prefix"} #' (or \code{values = "p"}), values are included as prefix #' to each label. See 'Examples'. #' @param attr.only Logical, if \code{TRUE}, labels are only searched for #' in the the vector's \code{attributes}; else, if \code{attr.only = FALSE} #' and \code{x} has no label attributes, factor levels or string values #' are returned. See 'Examples'. #' @param non.labelled Logical, if \code{TRUE}, values without labels will #' also be included in the returned labels (see \code{\link{fill_labels}}). #' @param drop.na Logical, whether labels of tagged NA values (see \code{\link[haven:tagged_na]{tagged_na()}}) #' should be included in the return value or not. By default, labelled #' (tagged) missing values are not returned. See \code{\link{get_na}} #' for more details on tagged NA values. #' @param drop.unused Logical, if \code{TRUE}, unused labels will be removed from #' the return value. #' @return Either a list with all value labels from all variables if \code{x} #' is a \code{data.frame} or \code{list}; a string with the value #' labels, if \code{x} is a variable; #' or \code{NULL} if no value label attribute was found. #' #' @examples #' # import SPSS data set #' # mydat <- read_spss("my_spss_data.sav") #' #' # retrieve variable labels #' # mydat.var <- get_label(mydat) #' #' # retrieve value labels #' # mydat.val <- get_labels(mydat) #' #' data(efc) #' get_labels(efc$e42dep) #' #' # simple barplot #' barplot(table(efc$e42dep)) #' # get value labels to annotate barplot #' barplot(table(efc$e42dep), #' names.arg = get_labels(efc$e42dep), #' main = get_label(efc$e42dep)) #' #' # include associated values #' get_labels(efc$e42dep, values = "as.name") #' #' # include associated values #' get_labels(efc$e42dep, values = "as.prefix") #' #' # get labels from multiple variables #' get_labels(list(efc$e42dep, efc$e16sex, efc$e15relat)) #' #' #' # create a dummy factor #' f1 <- factor(c("hi", "low", "mid")) #' # search for label attributes only #' get_labels(f1, attr.only = TRUE) #' # search for factor levels as well #' get_labels(f1) #' #' # same for character vectors #' c1 <- c("higher", "lower", "mid") #' # search for label attributes only #' get_labels(c1, attr.only = TRUE) #' # search for string values as well #' get_labels(c1) #' #' #' # create vector #' x <- c(1, 2, 3, 2, 4, NA) #' # add less labels than values #' x <- set_labels(x, labels = c("yes", "maybe", "no"), force.values = FALSE) #' # get labels for labelled values only #' get_labels(x) #' # get labels for all values #' get_labels(x, non.labelled = TRUE) #' #' #' # get labels, including tagged NA values #' library(haven) #' x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), #' c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), #' "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) #' # get current NA values #' x #' get_labels(x, values = "n", drop.na = FALSE) #' #' #' # create vector with unused labels #' data(efc) #' efc$e42dep <- set_labels( #' efc$e42dep, #' labels = c("independent" = 1, "dependent" = 4, "not used" = 5) #' ) #' get_labels(efc$e42dep) #' get_labels(efc$e42dep, drop.unused = TRUE) #' get_labels(efc$e42dep, non.labelled = TRUE, drop.unused = TRUE) #' @export get_labels <- function(x, attr.only = FALSE, values = NULL, non.labelled = FALSE, drop.na = TRUE, drop.unused = FALSE) { UseMethod("get_labels") } #' @export get_labels.data.frame <- function(x, attr.only = FALSE, values = NULL, non.labelled = FALSE, drop.na = TRUE, drop.unused = FALSE) { lapply(x, FUN = get_labels_helper, attr.only = attr.only, include.values = values, include.non.labelled = non.labelled, drop.na = drop.na, drop.unused = drop.unused) } #' @export get_labels.list <- function(x, attr.only = FALSE, values = NULL, non.labelled = FALSE, drop.na = TRUE, drop.unused = FALSE) { lapply(x, FUN = get_labels_helper, attr.only = attr.only, include.values = values, include.non.labelled = non.labelled, drop.na = drop.na, drop.unused = drop.unused) } #' @export get_labels.default <- function(x, attr.only = FALSE, values = NULL, non.labelled = FALSE, drop.na = TRUE, drop.unused = FALSE) { get_labels_helper(x, attr.only = attr.only, include.values = values, include.non.labelled = non.labelled, drop.na = drop.na, drop.unused = drop.unused) } # Retrieve value labels of a data frame or variable # See 'get_labels' get_labels_helper <- function(x, attr.only, include.values, include.non.labelled, drop.na, drop.unused) { labels <- attr(x, "labels", exact = TRUE) add_vals <- NULL # if variable has no label attribute, use factor levels as labels if (is.null(labels)) { # only use factor level if explicitly chosen by user if (!attr.only) { # get levels of vector lv <- levels(x) # do we have any levels? if (!is.null(lv)) { labels <- lv } else if (is.character(x)) { # finally, if we even don't have values, check for # character elements labels <- unique(x) } } } else { # drop na? if (isTRUE(drop.na) && requireNamespace("haven", quietly = TRUE)) { labels <- labels[!haven::is_tagged_na(labels)] } # check if we have anything if (!is.null(labels) && length(labels) > 0) { # sort labels labels <- labels[order(labels)] # retrieve values associated with labels. for character vectors # or factors with character levels, these values are character values, # else, they are numeric values if (is.character(x) || (is.factor(x) && !is.num.fac(x))) values <- unname(labels) else values <- as.numeric(unname(labels)) # retrieve label values in correct order labels <- names(labels) # do we have any tagged NAs? If so, get tagged NAs # and annotate them properly if (requireNamespace("haven", quietly = TRUE)) { if (any(haven::is_tagged_na(values))) { values[haven::is_tagged_na(values)] <- paste0("NA(", haven::na_tag(values[haven::is_tagged_na(values)]), ")") } } # do we want to include non-labelled values as well? if yes, # find all values in variable that have no label attributes if (include.non.labelled) { # get values of variable valid.vals <- sort(unique(stats::na.omit(as.vector(x)))) # check if we have different amount values than labels # or, if we have same amount of values and labels, whether # values and labels match or not if (length(valid.vals) != length(labels) || anyNA(match(values, valid.vals))) { # We now need to know, which values of "x" don't # have labels. add_vals <- valid.vals[!valid.vals %in% values] # add to labels labels <- c(labels, as.character(add_vals)) # fix value prefix new_vals <- c(as.character(values), as.character(add_vals)) # check if values are numeric or not. if not, # make sure it's character, so we can order # consistently if (suppressWarnings(anyNA(as.numeric(values)))) orderpart <- as.character(values) else orderpart <- as.numeric(values) # sort values and labels labels <- labels[order(c(orderpart, add_vals))] new_vals <- new_vals[order(c(orderpart, add_vals))] # set back new values values <- new_vals } } # include associated values? if (!is.null(include.values)) { # for backwards compatibility, we also accept "TRUE" # here we set values as names-attribute if ((is.logical(include.values) && isTRUE(include.values)) || include.values == "as.name" || include.values == "n") { names(labels) <- values } # here we include values as prefix of labels if (include.values == "as.prefix" || include.values == "p") { if (is.numeric(values)) labels <- sprintf("[%i] %s", values, labels) else labels <- sprintf("[%s] %s", values, labels) } } } } # drop unused labels with no values in data if (drop.unused) { # get all values av <- c(get_values(x, drop.na = drop.na), add_vals) # drop unused values if (!is.null(av)) labels <- labels[sort(av) %in% names(table(x))] } # return them labels } sjlabelled/R/tidy_labels.R0000644000176200001440000000547314046440731015206 0ustar liggesusers#' @title Repair value labels #' @name tidy_labels #' #' @description Duplicated value labels in variables may cause troubles when #' saving labelled data, or computing cross tabs (cf. #' \code{sjmisc::flat_table()} or \code{sjPlot::plot_xtab()}). #' \code{tidy_labels()} repairs duplicated value labels by suffixing #' them with the associated value. #' #' @param sep String that will be used to separate the suffixed value from the #' old label when creating the new value label. #' @param remove Logical, if \code{TRUE}, the original, duplicated value label will #' be replaced by the value (i.e. the value is not the suffix of the #' value label, but will become the value label itself). The #' \code{sep}-argument will be ignored in such cases. #' #' @inheritParams add_labels #' #' @return \code{x}, with "repaired" (unique) value labels for each variable. #' #' @examples #' if (require("sjmisc")) { #' set.seed(123) #' x <- set_labels( #' sample(1:5, size = 20, replace = TRUE), #' labels = c("low" = 1, ".." = 2, ".." = 3, ".." = 4, "high" = 5) #' ) #' frq(x) #' #' z <- tidy_labels(x) #' frq(z) #' #' z <- tidy_labels(x, sep = ".") #' frq(z) #' #' z <- tidy_labels(x, remove = TRUE) #' frq(z) #' } #' @export tidy_labels <- function(x, ..., sep = "_", remove = FALSE) { dots <- as.character(match.call(expand.dots = FALSE)$`...`) .dat <- .get_dot_data(x, dots) if (is.data.frame(x)) { # iterate variables of data frame for (i in colnames(.dat)) { x[[i]] <- tidy_labels_helper(x = .dat[[i]], sep = sep, remove = remove) } } else { x <- tidy_labels_helper(x = .dat, sep = sep, remove = remove) } x } tidy_labels_helper <- function(x, sep, remove) { # get value labels from variable. drop unused labels labs <- get_labels( x, attr.only = TRUE, values = FALSE, drop.unused = TRUE, drop.na = TRUE ) # no labels? then return... if (is.null(labs)) return(x) # get values that are associated with labels values <- get_values(drop_labels(x), drop.na = TRUE) # create table, and check if any value label is duplicated duped.val <- names(which(table(labs) > 1)) # no dupes found? return variable then if (isempty(duped.val)) return(x) # find position of duplicated labels dupes <- lapply(duped.val, function(.x) which(labs == .x)) dupes <- as.vector(unlist(dupes)) if (remove) { # replace labels with value labs[dupes] <- sprintf("%s",values[dupes]) } else { # prefix labels with value labs[dupes] <- sprintf("%s%s%s", labs[dupes], sep, values[dupes]) } # set back value labels names(values) <- labs attr(x, "labels") <- values x }sjlabelled/R/fill_labels.R0000644000176200001440000000266214046276501015162 0ustar liggesusers#' @rdname zap_labels #' @export fill_labels <- function(x, ...) { dots <- as.character(match.call(expand.dots = FALSE)$`...`) .dat <- .get_dot_data(x, dots) if (is.data.frame(x)) { # iterate variables of data frame for (i in colnames(.dat)) { x[[i]] <- fill_labels_helper(.dat[[i]]) } } else { x <- fill_labels_helper(.dat) } x } fill_labels_helper <- function(x) { # get current labels current.values <- get_labels(x, attr.only = TRUE, non.labelled = FALSE) # get all labels, including non-labelled values all.values <- get_labels(x, attr.only = TRUE, values = "n", non.labelled = TRUE) # have any values? if (!is.null(all.values)) { # set back all labels, if amount of all labels differ # from the "current" values if (length(all.values) > length(current.values)) { # first, we need to switch name attribute and values all.val.switch <- as.numeric(names(all.values)) names(all.val.switch) <- as.character(all.values) # get current NA values current.na <- get_na(x) # add NA if (!is.null(current.na)) all.val.switch <- c(all.val.switch, current.na) # then set labels x <- set_labels( x, labels = all.val.switch, force.labels = TRUE, force.values = TRUE ) } } x } sjlabelled/R/remove_labels.R0000644000176200001440000000517614046276707015544 0ustar liggesusers#' @rdname add_labels #' @export remove_labels <- function(x, ..., labels) { # check for valid value. value must be a named vector if (is.null(labels)) stop("`labels` is NULL.", call. = FALSE) # if value is NA, it must be tagged na.labels <- labels[is.na(labels)] if (length(na.labels)) { if (!requireNamespace("haven", quietly = TRUE)) { stop("Package 'haven' required for this function. Please install it.") } if (!all(haven::is_tagged_na(na.labels))) stop("`labels` must be a tagged NA.", call. = FALSE) } # evaluate arguments, generate data dots <- as.character(match.call(expand.dots = FALSE)$`...`) .dat <- .get_dot_data(x, dots) if (is.data.frame(x)) { # iterate variables of data frame for (i in colnames(.dat)) { x[[i]] <- remove_labels_helper(.dat[[i]], labels) } } else { x <- remove_labels_helper(.dat, labels) } x } remove_labels_helper <- function(x, labels) { # get current labels of `x` current.labels <- get_labels(x, attr.only = TRUE, values = "n", non.labelled = FALSE) # get current NA values current.na <- get_na(x) # if we have no labels, return if (is.null(current.labels) && is.null(current.na)) { message("`x` has no value labels.") return(x) } if (!requireNamespace("haven", quietly = TRUE)) { stop("Package 'haven' required for this function. Please install it.") } # remove by index? if (haven::is_tagged_na(labels[1])) { current.na <- current.na[haven::na_tag(current.na) != haven::na_tag(labels)] } else if (is.numeric(labels)) { current.labels <- current.labels[-labels] } else if (is.character(labels)) { # find value labels that should be removes removers <- as.vector(current.labels) %in% labels # remove them current.labels <- current.labels[!removers] } # switch value and names attribute, since get_labels # returns the values as names, and the value labels # as "vector content" all.labels <- names(current.labels) if (.is_num_chr(all.labels)) all.labels <- as.numeric(all.labels) names(all.labels) <- as.character(current.labels) # sort labels by values all.labels <- all.labels[order(all.labels)] # complete labels, including NA labels compl.lab <- c(all.labels, current.na) # check if any labels left after removing if (is.null(compl.lab) || isempty(compl.lab)) { # clear all labels x <- remove_all_labels(x) } else { # set back labels attr(x, "labels") <- compl.lab } x } sjlabelled/R/as_numeric.R0000644000176200001440000001471514046456757015056 0ustar liggesusers#' @title Convert factors to numeric variables #' @name as_numeric #' #' @description This function converts (replaces) factor levels with the #' related factor level index number, thus the factor is converted to #' a numeric variable. #' #' @param start.at Starting index, i.e. the lowest numeric value of the variable's #' value range. By default, this argument is \code{NULL}, hence the lowest #' value of the returned numeric variable corresponds to the lowest factor #' level (if factor levels are numeric) or to \code{1} (if factor levels #' are not numeric). #' @param keep.labels Logical, if \code{TRUE}, former factor levels will be added as #' value labels. For numeric factor levels, values labels will be used, #' if present. See 'Examples' and \code{\link{set_labels}} for more details. #' @param use.labels Logical, if \code{TRUE} and \code{x} has numeric value labels, #' the values defined in the labels (right-hand side of \code{labels}, for instance #' \code{labels = c(null = 0, one = 1)}) will be set as numeric values (instead #' of consecutive factor level numbers). See 'Examples'. #' #' @return A numeric variable with values ranging either from \code{start.at} to #' \code{start.at} + length of factor levels, or to the corresponding #' factor levels (if these were numeric). If \code{x} is a data frame, #' the complete data frame \code{x} will be returned, where variables #' specified in \code{...} are coerced to numeric; if \code{...} is #' not specified, applies to all variables in the data frame. #' #' @inheritParams add_labels #' #' @examples #' data(efc) #' test <- as_label(efc$e42dep) #' table(test) #' #' table(as_numeric(test)) #' hist(as_numeric(test, start.at = 0)) #' #' # set lowest value of new variable to "5". #' table(as_numeric(test, start.at = 5)) #' #' # numeric factor keeps values #' dummy <- factor(c("3", "4", "6")) #' table(as_numeric(dummy)) #' #' # do not drop unused factor levels #' dummy <- ordered(c(rep("No", 5), rep("Maybe", 3)), #' levels = c("Yes", "No", "Maybe")) #' as_numeric(dummy) #' #' # non-numeric factor is converted to numeric #' # starting at 1 #' dummy <- factor(c("D", "F", "H")) #' table(as_numeric(dummy)) #' #' # for numeric factor levels, value labels will be used, if present #' dummy1 <- factor(c("3", "4", "6")) #' dummy1 <- set_labels(dummy1, labels = c("first", "2nd", "3rd")) #' dummy1 #' as_numeric(dummy1) #' #' # for non-numeric factor levels, these will be used. #' # value labels will be ignored #' dummy2 <- factor(c("D", "F", "H")) #' dummy2 <- set_labels(dummy2, labels = c("first", "2nd", "3rd")) #' dummy2 #' as_numeric(dummy2) #' #' #' # easily coerce specific variables in a data frame to numeric #' # and keep other variables, with their class preserved #' data(efc) #' efc$e42dep <- as.factor(efc$e42dep) #' efc$e16sex <- as.factor(efc$e16sex) #' efc$e17age <- as.factor(efc$e17age) #' #' # convert back "sex" and "age" into numeric #' head(as_numeric(efc, e16sex, e17age)) #' #' x <- factor(c("None", "Little", "Some", "Lots")) #' x <- set_labels(x, #' labels = c(None = "0.5", Little = "1.3", Some = "1.8", Lots = ".2") #' ) #' x #' as_numeric(x) #' as_numeric(x, use.labels = TRUE) #' as_numeric(x, use.labels = TRUE, keep.labels = FALSE) #' @export as_numeric <- function(x, ...) { UseMethod("as_numeric") } #' @rdname as_numeric #' @export to_numeric <- as_numeric #' @export as_numeric.default <- function(x, start.at = NULL, keep.labels = TRUE, use.labels = FALSE, ...) { as_numeric_helper(x, start.at, keep.labels, use.labels) } #' @rdname as_numeric #' @export as_numeric.data.frame <- function(x, ..., start.at = NULL, keep.labels = TRUE, use.labels = FALSE) { dots <- sapply(eval(substitute(alist(...))), deparse) .dat <- .get_dot_data(x, dots) # iterate variables of data frame for (i in colnames(.dat)) { x[[i]] <- as_numeric_helper(.dat[[i]], start.at, keep.labels, use.labels) } x } as_numeric_helper <- function(x, start.at, keep.labels, use.labels) { labels <- NULL # is already numeric? if (is.numeric(x)) return(x) # save variable label varlab <- get_label(x) # get labels labels <- get_labels(x, attr.only = TRUE, values = "n") # is character? if (is.character(x)) { # has labels? if (!is.null(labels)) { # sort labels correctly, therefor get "levels" lvls <- levels(as.factor(x)) # do we have more labels than values? If yes, drop unused labels if (length(labels) > length(lvls)) labels <- labels[names(labels) %in% lvls] # it might be that we have more levels than labels, in this case # drop unused levels - else, ordering won't work if (length(lvls) > length(labels)) lvls <- lvls[lvls %in% names(labels)] # sort labels correctly labels <- unname(labels[order(names(labels), lvls)]) } # convert to factor x <- as.factor(x) } # check if we have numeric factor levels if (is.num.fac(x)) { # retrieve "value labels" if (is.null(labels)) labels <- levels(x) # convert to numeric via as.vector new_value <- as.numeric(as.vector((x))) # new minimum value? if (!is.null(start.at) && is.numeric(start.at)) { # check if lowest value of variable differs from # requested minimum conversion value val_diff <- start.at - min(new_value, na.rm = TRUE) # adjust new_value new_value <- new_value + val_diff } } else { # use non-numeric factor levels as new labels labels <- levels(x) # check which numeric values to use. If value labels were # numeric and 'use.labels = TRUE', value labels as used # as values if (use.labels) { levels(x) <- get_values(x) } else { # check start.at value if (is.null(start.at)) start.at <- 1 # get amount of categories l <- nlevels(x) # determine highest category value end <- start.at + l - 1 # replace labels with numeric values levels(x) <- start.at:end } # convert to numeric new_value <- as.numeric(as.character(x)) } # check if we should set back former variable and value labels if (keep.labels) { new_value <- set_labels(new_value, labels = labels, force.labels = TRUE) new_value <- set_label(new_value, label = varlab) } new_value } sjlabelled/R/set_na.R0000644000176200001440000002617114046443535014167 0ustar liggesusers#' @title Replace specific values in vector with NA #' @name set_na #' #' @description This function replaces specific values of variables with \code{NA}. #' #' @param na Numeric vector with values that should be replaced with NA values, #' or a character vector if values of factors or character vectors should be #' replaced. For labelled vectors, may also be the name of a value label. In #' this case, the associated values for the value labels in each vector #' will be replaced with \code{NA}. \code{na} can also be a named vector. #' If \code{as.tag = FALSE}, values will be replaced only in those variables #' that are indicated by the value names (see 'Examples'). #' @param drop.levels Logical, if \code{TRUE}, factor levels of values that have #' been replaced with \code{NA} are dropped. See 'Examples'. #' @param as.tag Logical, if \code{TRUE}, values in \code{x} will be replaced #' by \code{tagged_na}, else by usual \code{NA} values. Use a named #' vector to assign the value label to the tagged NA value (see 'Examples'). #' #' @inheritParams as_factor #' #' @return \code{x}, with all values in \code{na} being replaced by \code{NA}. #' If \code{x} is a data frame, the complete data frame \code{x} will #' be returned, with NA's set for variables specified in \code{...}; #' if \code{...} is not specified, applies to all variables in the #' data frame. #' #' @note Labels from values that are replaced with NA and no longer used will be #' removed from \code{x}, however, other value and variable label #' attributes are preserved. For more details on labelled data, #' see vignette \href{https://cran.r-project.org/package=sjlabelled/vignettes/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package}. #' #' @details \code{set_na()} converts all values defined in \code{na} with #' a related \code{NA} or tagged NA value (see \code{\link[haven:tagged_na]{tagged_na()}}). #' Tagged \code{NA}s work exactly like regular R missing values #' except that they store one additional byte of information: a tag, #' which is usually a letter ("a" to "z") or character number ("0" to "9"). #' \cr \cr #' \strong{Different NA values for different variables} #' \cr \cr #' If \code{na} is a named vector \emph{and} \code{as.tag = FALSE}, the names #' indicate variable names, and the associated values indicate those values #' that should be replaced by \code{NA} in the related variable. For instance, #' \code{set_na(x, na = c(v1 = 4, v2 = 3))} would replace all 4 in \code{v1} #' with \code{NA} and all 3 in \code{v2} with \code{NA}. #' \cr \cr #' If \code{na} is a named list \emph{and} \code{as.tag = FALSE}, it is possible #' to replace different multiple values by \code{NA} for different variables #' separately. For example, \code{set_na(x, na = list(v1 = c(1, 4), v2 = 5:7))} #' would replace all 1 and 4 in \code{v1} with \code{NA} and all 5 to 7 in #' \code{v2} with \code{NA}. #' \cr \cr #' Furthermore, see also 'Details' in \code{\link{get_na}}. #' #' @examples #' if (require("sjmisc") && require("dplyr") && require("haven")) { #' # create random variable #' dummy <- sample(1:8, 100, replace = TRUE) #' # show value distribution #' table(dummy) #' # set value 1 and 8 as missings #' dummy <- set_na(dummy, na = c(1, 8)) #' # show value distribution, including missings #' table(dummy, useNA = "always") #' #' # add named vector as further missing value #' set_na(dummy, na = c("Refused" = 5), as.tag = TRUE) #' #' # see different missing types #' print_tagged_na(set_na(dummy, na = c("Refused" = 5), as.tag = TRUE)) #' #' #' # create sample data frame #' dummy <- data.frame(var1 = sample(1:8, 100, replace = TRUE), #' var2 = sample(1:10, 100, replace = TRUE), #' var3 = sample(1:6, 100, replace = TRUE)) #' # set value 2 and 4 as missings #' dummy %>% set_na(na = c(2, 4)) %>% head() #' dummy %>% set_na(na = c(2, 4), as.tag = TRUE) %>% get_na() #' dummy %>% set_na(na = c(2, 4), as.tag = TRUE) %>% get_values() #' #' data(efc) #' dummy <- data.frame( #' var1 = efc$c82cop1, #' var2 = efc$c83cop2, #' var3 = efc$c84cop3 #' ) #' # check original distribution of categories #' lapply(dummy, table, useNA = "always") #' # set 3 to NA for two variables #' lapply(set_na(dummy, var1, var3, na = 3), table, useNA = "always") #' #' #' # if 'na' is a named vector *and* 'as.tag = FALSE', different NA-values #' # can be specified for each variable #' set.seed(1) #' dummy <- data.frame( #' var1 = sample(1:8, 10, replace = TRUE), #' var2 = sample(1:10, 10, replace = TRUE), #' var3 = sample(1:6, 10, replace = TRUE) #' ) #' dummy #' #' # Replace "3" in var1 with NA, "5" in var2 and "6" in var3 #' set_na(dummy, na = c(var1 = 3, var2 = 5, var3 = 6)) #' #' # if 'na' is a named list *and* 'as.tag = FALSE', for each #' # variable different multiple NA-values can be specified #' set_na(dummy, na = list(var1 = 1:3, var2 = c(7, 8), var3 = 6)) #' #' #' # drop unused factor levels when being set to NA #' x <- factor(c("a", "b", "c")) #' x #' set_na(x, na = "b", as.tag = TRUE) #' set_na(x, na = "b", drop.levels = FALSE, as.tag = TRUE) #' #' # set_na() can also remove a missing by defining the value label #' # of the value that should be replaced with NA. This is in particular #' # helpful if a certain category should be set as NA, however, this category #' # is assigned with different values accross variables #' x1 <- sample(1:4, 20, replace = TRUE) #' x2 <- sample(1:7, 20, replace = TRUE) #' x1 <- set_labels(x1, labels = c("Refused" = 3, "No answer" = 4)) #' x2 <- set_labels(x2, labels = c("Refused" = 6, "No answer" = 7)) #' #' tmp <- data.frame(x1, x2) #' get_labels(tmp) #' table(tmp, useNA = "always") #' #' get_labels(set_na(tmp, na = "No answer")) #' table(set_na(tmp, na = "No answer"), useNA = "always") #' #' # show values #' tmp #' set_na(tmp, na = c("Refused", "No answer")) #' } #' @export set_na <- function(x, ..., na, drop.levels = TRUE, as.tag = FALSE) { # check for valid value if (is.null(na) || anyNA(na)) { warning("`na` is not allowed to be `NULL` or to contain `NA`-values.", call. = FALSE) return(x) } # evaluate arguments, generate data dots <- as.character(match.call(expand.dots = FALSE)$`...`) .dat <- .get_dot_data(x, dots) if (is.data.frame(x)) { # iterate variables of data frame for (i in colnames(.dat)) { x[[i]] <- set_na_helper( x = .dat[[i]], value = na, drop.levels = drop.levels, as.tag = as.tag, var.name = i ) } } else { x <- set_na_helper( x = .dat, value = na, drop.levels = drop.levels, as.tag = as.tag, var.name = NULL ) } x } #' @importFrom stats na.omit set_na_helper <- function(x, value, drop.levels, as.tag, var.name) { # check if values has only NA's if (sum(is.na(x)) == length(x)) return(x) if (!requireNamespace("haven", quietly = TRUE)) { stop("Package 'haven' required for this function. Please install it.") } if (!requireNamespace("sjmisc", quietly = TRUE)) { stop("Package 'sjmisc' required for this function. Please install it.") } if (is.list(value)) { lnames <- unname(unlist(mapply(function(.x, .y) rep(.y, length(.x)), value, names(value), SIMPLIFY = FALSE))) value <- unlist(value) names(value) <- lnames } # check if value is a named vector na.names <- names(value) # get values for value labels lab.values <- get_values(x, drop.na = FALSE) # no tagged NA's for date values if (inherits(x, "Date")) as.tag <- FALSE # get value labels val.lab <- attr(x, "labels", exact = TRUE) val.lab <- val.lab[!haven::is_tagged_na(val.lab)] # if value is a character vector, user may have defined a value label. # find value of associated label then if (is.character(value)) { # get value labels that match the values which should be set to NA val.match <- val.lab[names(val.lab) %in% value] # now get values for this vector if (!sjmisc::is_empty(val.match) && !sjmisc::is_empty(names(val.match))) { # should be numeric, else we might have a factor na.values <- suppressWarnings(as.numeric(val.match)) # if we have no NA, coercing to numeric worked. Now get these # NA values and remove value labels from vector if (!anyNA(na.values)) { x <- suppressWarnings(remove_labels(x, labels = value)) value <- na.values } } } # haven::na_tag works only for double if (is.double(x) && as.tag) { # get na-tags, to check whether NA already was defined nat <- as.vector(stats::na.omit(haven::na_tag(x))) # stop if user wants to assign a value to NA that is # already assigned as NA if (any(nat %in% as.character(value))) stop("Can't set NA values. At least one element of `value` is already defined as NA. Use `zap_na_tags()` to remove tags from NA values.", call. = FALSE) } # iterate all NAs for (i in seq_len(length(value))) { if (as.tag) { # find associated values in x and set them as tagged NA x[x %in% value[i]] <- haven::tagged_na(as.character(value[i])) # is na-value in labelled values? lv <- which(lab.values == value[i]) # if yes, replace label if (!sjmisc::is_empty(lv)) { # for tagged NA, use tag as new attribute # change value attr(x, "labels")[lv] <- haven::tagged_na(as.character(value[i])) # change label as well? if (!is.null(na.names)) names(attr(x, "labels"))[lv] <- na.names[i] } else { # get labels and label values lv <- attr(x, "labels", exact = TRUE) ln <- names(attr(x, "labels", exact = TRUE)) # add NA attr(x, "labels") <- c(lv, haven::tagged_na(as.character(value[i]))) if (!is.null(na.names)) names(attr(x, "labels")) <- c(ln, na.names[i]) else names(attr(x, "labels")) <- c(ln, as.character(value[i])) } } else { if (!is.null(na.names) && !is.null(var.name)) { if (na.names[i] == var.name) x[x %in% value[i]] <- NA } else { # find associated values in x and set them as tagged NA x[x %in% value[i]] <- NA } } } # remove unused value labels removers <- which(get_values(x) %in% value) if (!is.null(removers) && !sjmisc::is_empty(removers, first.only = TRUE)) { attr(x, "labels") <- val.lab[-removers] } # if we have a factor, check if we have unused levels now due to NA # assignment. If yes, drop levels if (is.factor(x) && drop.levels && length(levels(x)) != length(levels(droplevels(x)))) { # save value and variable labels keep.val <- attr(x, "labels", exact = TRUE) keep.var <- attr(x, "label", exact = TRUE) # drop levels x <- droplevels(x) # set back labels attr(x, "labels") <- keep.val attr(x, "label") <- keep.var } x } sjlabelled/R/read.R0000644000176200001440000002443314046276551013632 0ustar liggesusers#' @title Import data from other statistical software packages #' @name read_spss #' #' @description Import data from SPSS, SAS or Stata, including NA's, value and variable #' labels. #' #' @seealso Vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package}. #' #' @param path File path to the data file. #' @param atomic.to.fac Logical, if \code{TRUE}, categorical variables imported #' from the dataset (which are imported as \code{atomic}) will be #' converted to factors. #' @param drop.labels Logical, if \code{TRUE}, unused value labels are removed. See #' \code{\link{drop_labels}}. #' @param tag.na Logical, if \code{TRUE}, missing values are imported #' as \code{\link[haven:tagged_na]{tagged_na}} values; else, missing values are #' converted to regular \code{NA} (default behaviour). #' @param path.cat Optional, the file path to the SAS catalog file. #' @param enc The character encoding used for the file. This defaults to the encoding #' specified in the file, or UTF-8. Use this argument to override the default #' encoding stored in the file. #' @param verbose Logical, if \code{TRUE}, a progress bar is displayed that indicates #' the progress of converting the imported data. #' #' @return A data frame containing the imported, labelled data. Retrieve value labels with #' \code{\link{get_labels}} and variable labels with \code{\link{get_label}}. #' #' @note These are wrapper functions for \CRANpkg{haven}'s \code{read_*}-functions. #' #' @details These read-functions behave slightly differently from \pkg{haven}'s #' read-functions: #' \itemize{ #' \item The vectors in the returned data frame are of class \code{atomic}, not of class \code{labelled}. The labelled-class might cause issues with other packages. #' \item When importing SPSS data, variables with user defined missings \emph{won't} be read into \code{labelled_spss} objects, but imported as \emph{tagged NA values}. #' } #' The \code{atomic.to.fac} option only #' converts those variables into factors that are of class \code{atomic} and #' which have value labels after import. Atomic vectors without value labels #' are considered as continuous and not converted to factors. #' #' @examples #' \dontrun{ #' # import SPSS data set. uses haven's read function #' mydat <- read_spss("my_spss_data.sav") #' #' # use haven's read function, convert atomic to factor #' mydat <- read_spss("my_spss_data.sav", atomic.to.fac = TRUE) #' #' # retrieve variable labels #' mydat.var <- get_label(mydat) #' #' # retrieve value labels #' mydat.val <- get_labels(mydat)} #' @export read_spss <- function(path, atomic.to.fac = FALSE, drop.labels = FALSE, tag.na = FALSE, enc = NULL, verbose = FALSE) { if (!requireNamespace("haven", quietly = TRUE)) { stop("Package 'haven' required for this function. Please install it.") } # read data file data.spss <- haven::read_sav(file = path, encoding = enc, user_na = tag.na) # prepare tagged NA? if (tag.na) { # remember all-NA values all_missings <- c() # convert NA for all variables for (i in seq_len(ncol(data.spss))) { # get variable x <- data.spss[[i]] # has variable ONLY missings? if (all(is.na(x))) { all_missings <- c(all_missings, i) } else { # get NA values na.values <- attr(x, "na_values", exact = TRUE) na.range <- attr(x, "na_range", exact = TRUE) # has any NA values? if (!is.null(na.values)) { # get label attr labels <- attr(x, "labels", exact = TRUE) # create tagged NA char.na.values <- as.character(na.values) if (is.numeric(na.values)) { negative.values <- which(na.values < 1) char.na.values[negative.values] <- letters[abs(na.values[negative.values])] } tna <- haven::tagged_na(char.na.values) # replace values with tagged NA for (j in seq_len(length(na.values))) { x[x == na.values[j]] <- tna[j] } # do we have any labels? if (!is.null(labels)) { # get missing labels na.val.labels <- names(labels)[labels %in% na.values] # do we have any labels for missings? then name tagged # NA with value labels, else use values as labels empty_val_labels <- isempty(na.val.labels) if (length(na.val.labels) > 0 && !empty_val_labels) names(tna) <- na.val.labels else names(tna) <- na.values # add/replace value labeld for tagged NA labels <- c(labels[!labels %in% na.values], tna) } else { # use values as names, if we don't have value labels names(tna) <- na.values labels <- tna } # set back attribute attr(x, "labels") <- labels } # do we have NA range? if (!is.null(na.range)) { # check if any of the missing range values actually exists in data min.range.start <- min(na.range[!is.infinite(na.range)], na.rm = TRUE) max.range.end <- max(na.range[!is.infinite(na.range)], na.rm = TRUE) # we start with range up to highest value if (any(na.range == Inf) && min.range.start <= max(x, na.rm = TRUE)) { x <- set_na(x, na = sort(stats::na.omit(unique(x[x >= min.range.start]))), as.tag = TRUE) } # we start with range up to highest value if (any(na.range == -Inf) && max.range.end >= min(x, na.rm = TRUE)) { x <- set_na(x, na = sort(stats::na.omit(unique(x[x <= max.range.end]))), as.tag = TRUE) } # here we have no infinite value range if (!any(is.infinite(na.range))) { x <- set_na(x, na = sort(stats::na.omit(unique(c( na.range[!is.infinite(na.range)], x[x >= min.range.start & x <= max.range.end] )))), as.tag = TRUE) } } # finally, copy x back to data frame if (!is.null(na.range) || !is.null(na.values)) { data.spss[[i]] <- x attr(data.spss[[i]], "na.values") <- na.values attr(data.spss[[i]], "na.range") <- na.range } } } # do we have any "all-missing-variables"? if (!isempty(all_missings)) { message(sprintf("Following %i variables have only missing values:", length(all_missings))) cat(paste(all_missings, collapse = ", ")) cat("\n") } } .read_postprocessing(data.spss, atomic.to.fac, drop.labels, verbose) } #' @rdname read_spss #' @export read_sas <- function(path, path.cat = NULL, atomic.to.fac = FALSE, drop.labels = FALSE, enc = NULL, verbose = FALSE) { if (!requireNamespace("haven", quietly = TRUE)) { stop("Package 'haven' required for this function. Please install it.") } # read data file data <- haven::read_sas(data_file = path, catalog_file = path.cat, encoding = enc) # find all-NA values len <- nrow(data) all_missings <- names(which(unlist(lapply(data, function(x) sum(is.na(x)) == len)) == TRUE)) # do we have any "all-missing-variables"? if (!isempty(all_missings)) { message(sprintf("Following %i variables have only missing values:", length(all_missings))) cat(paste(all_missings, collapse = ", ")) cat("\n") } .read_postprocessing(data, atomic.to.fac, drop.labels, verbose) } #' @rdname read_spss #' @export read_stata <- function(path, atomic.to.fac = FALSE, drop.labels = FALSE, enc = NULL, verbose = FALSE) { if (!requireNamespace("haven", quietly = TRUE)) { stop("Package 'haven' required for this function. Please install it.") } # read data file data <- haven::read_dta(file = path, encoding = enc) # find all-NA values len <- nrow(data) all_missings <- names(which(unlist(lapply(data, function(x) sum(is.na(x)) == len)) == TRUE)) # do we have any "all-missing-variables"? if (!isempty(all_missings)) { message(sprintf("Following %i variables have only missing values:", length(all_missings))) cat(paste(all_missings, collapse = ", ")) cat("\n") } .read_postprocessing(data, atomic.to.fac, drop.labels, verbose) } .read_postprocessing <- function(data, atomic.to.fac, drop.labels, verbose) { # remove label attributes d <- unlabel(data, verbose = verbose) # drop unused labels if (drop.labels) d <- drop_labels(d) # convert atomic values to factors if (atomic.to.fac) d <- .atomic_to_fac(d) # return data frame d } # converts atomic numeric vectors into factors with # numerical factor levels .atomic_to_fac <- function(d) { # tell user... message("Converting atomic to factors. Please wait...\n") # iterate all columns as.data.frame(lapply( d, function(x) { # capture value labels attribute first labs <- attr(x, "labels", exact = TRUE) # and save variable label, if any lab <- attr(x, "label", exact = TRUE) # is atomic, which was factor in SPSS? if (is.atomic(x) && !is.null(labs) && length(labs) >= length(unique(stats::na.omit(x)))) { # so we have value labels (only typical for factors, not # continuous variables) and a variable of type "atomic" (SPSS # continuous variables would be imported as numeric) - this # indicates we have a factor variable. now we convert to # factor x <- as.factor(x) # set back labels attribute attr(x, "labels") <- labs # any variable label? if (!is.null(lab)) attr(x, "label") <- lab } x }), stringsAsFactors = FALSE) } #' @importFrom tools file_ext #' @rdname read_spss #' @export read_data <- function(path, atomic.to.fac = FALSE, drop.labels = FALSE, enc = NULL, verbose = FALSE) { switch( tools::file_ext(path), "sav" = , "por" = read_spss(path = path, atomic.to.fac = atomic.to.fac, drop.labels = drop.labels, enc = enc, verbose = verbose), "dta" = read_stata(path = path, atomic.to.fac = atomic.to.fac, drop.labels = drop.labels, enc = enc, verbose = verbose), read_sas(path = path, atomic.to.fac = atomic.to.fac, drop.labels = drop.labels, enc = enc, verbose = verbose) ) }sjlabelled/R/val_labels.R0000644000176200001440000000226414046276730015020 0ustar liggesusers#' @rdname set_labels #' @export val_labels <- function(x, ..., force.labels = FALSE, force.values = TRUE, drop.na = TRUE) { if (!requireNamespace("rlang", quietly = TRUE)) { stop("Package 'rlang' required for this function to work. Please install it.") } # get dots .dots <- rlang::enexprs(...) labels <- lapply(.dots, function(i) if (is.language(i)) eval(i) else i) # select variables vars <- names(labels) # non-matching column names non.vars <- which(!(vars %in% colnames(x))) # check if all variables exist in data frame if (!isempty(non.vars)) { # tell user warning(sprintf( "Following elements are no valid column names in `x`: %s", paste(vars[non.vars], collapse = ",") ), call. = FALSE) # remove invalid names vars <- vars[-non.vars] labels <- labels[-non.vars] } # set label for all variables for (i in seq_len(length(vars))) { x[[vars[i]]] <- set_labels_helper( x = x[[vars[i]]], labels = labels[[i]], force.labels = force.labels, force.values = force.values, drop.na = drop.na, var.name = vars[i] ) } # return data x } sjlabelled/R/var_labels.R0000644000176200001440000000212614046276735015030 0ustar liggesusers#' @rdname set_label #' @export var_labels <- function(x, ...) { # get dots .dots <- match.call(expand.dots = FALSE)$`...` if (inherits(.dots, "pairlist")) { if (!requireNamespace("rlang", quietly = TRUE)) { stop("Package 'rlang' required for this function to work. Please install it.") } .dots <- unlist(lapply(rlang::ensyms(...), rlang::as_string)) } else { .dots <- unlist(.dots) } # select variables vars <- names(.dots) # get new labels labels <- unname(.dots) # non-matching column names non.vars <- which(!(vars %in% colnames(x))) # check if all variables exist in data frame if (!isempty(non.vars)) { # tell user warning(sprintf( "Following elements are no valid column names in `x`: %s", paste(vars[non.vars], collapse = ",") ), call. = FALSE) # remove invalid names vars <- vars[-non.vars] labels <- labels[-non.vars] } # set label for all variables for (i in seq_len(length(vars))) { attr(x[[vars[i]]], "label") <- labels[i] } # return data x } sjlabelled/R/efc.R0000644000176200001440000000104513446531213013436 0ustar liggesusers#' @docType data #' @title Sample dataset from the EUROFAMCARE project #' @name efc #' @keywords data #' #' @description A SPSS sample data set, imported with the \code{\link{read_spss}} function. #' #' @examples #' # Attach EFC-data #' data(efc) #' #' # Show structure #' str(efc) #' #' # show first rows #' head(efc) #' #' # show variables #' \dontrun{ #' library(sjPlot) #' view_df(efc) #' #' # show variable labels #' get_label(efc) #' #' # plot efc-data frame summary #' sjt.df(efc, altr.row.col = TRUE)} #' NULL sjlabelled/R/write.R0000644000176200001440000000520214046440101014022 0ustar liggesusers#' @title Write data to other statistical software packages #' @name write_spss #' #' @description These functions write the content of a data frame to an SPSS, SAS or #' Stata-file. #' #' @param x A data frame that should be saved as file. #' @param path File path of the output file. #' @param version File version to use. Supports versions 8-14. #' @param drop.na Logical, if \code{TRUE}, tagged \code{NA} values with value labels #' will be converted to regular NA's. Else, tagged \code{NA} values will be replaced #' with their value labels. See 'Examples' and \code{\link{get_na}}. #' @param compress Logical, if \code{TRUE} and a SPSS-file should be created, #' saves \code{x} in \code{zsav} (i.e. compressed SPSS) format. #' #' @export write_spss <- function(x, path, drop.na = FALSE, compress = FALSE) { .write_data(x = x, path = path, type = "spss", version = 14, drop.na = drop.na, compress = compress) } #' @rdname write_spss #' @export write_stata <- function(x, path, drop.na = FALSE, version = 14) { .write_data(x = x, path = path, type = "stata", version = version, drop.na = drop.na) } #' @rdname write_spss #' @export write_sas <- function(x, path, drop.na = FALSE) { .write_data(x = x, path = path, type = "sas", version = 14, drop.na = drop.na) } .write_data <- function(x, path, type, version, drop.na, compress = FALSE) { if (!requireNamespace("haven", quietly = TRUE)) { stop("Package 'haven' required for this function. Please install it.") } # make sure to have tidy labels message("Tidying value labels. Please wait...") x <- tidy_labels(x) # convert data to labelled # x <- as_label(x, add.non.labelled = TRUE, drop.na = drop.na) x <- as_labelled(x, add.labels = TRUE, skip.strings = TRUE, add.class = TRUE, tag.na = TRUE) # check for correct column names for (i in seq_len(ncol(x))) { # check column name end.point <- colnames(x)[i] # if it ends with a dot, add a char. dot is invalid last char for SPSS if (substr(end.point, nchar(end.point), nchar(end.point)) == ".") { colnames(x)[i] <- paste0(end.point, i) } } # tell user message(sprintf("Writing %s file to '%s'. Please wait...", type, path)) if (tolower(tools::file_ext(path)) == "zsav") { compress <- TRUE } if (type == "spss") { # write SPSS haven::write_sav(data = x, path = path, compress = compress) } else if (type == "stata") { # write Stata haven::write_dta(data = x, path = path, version = version) } else if (type == "sas") { # write Stata haven::write_sas(data = x, path = path) } } sjlabelled/R/get_na.R0000644000176200001440000000676314046440471014154 0ustar liggesusers#' @title Retrieve tagged NA values of labelled variables #' @name get_na #' #' @description This function retrieves tagged NA values and their associated #' value labels from a labelled vector. #' #' @param x Variable (vector) with value label attributes, including #' tagged missing values (see \code{\link[haven:tagged_na]{tagged_na()}}); #' or a data frame or list with such variables. #' @param as.tag Logical, if \code{TRUE}, the returned values are not tagged NA's, #' but their string representative including the tag value. See 'Examples'. #' @return The tagged missing values and their associated value labels from \code{x}, #' or \code{NULL} if \code{x} has no tagged missing values. #' #' @details Other statistical software packages (like 'SPSS' or 'SAS') allow to define #' multiple missing values, e.g. \emph{not applicable}, \emph{refused answer} #' or "real" missing. These missing types may be assigned with #' different values, so it is possible to distinguish between these #' missing types. In R, multiple declared missings cannot be represented #' in a similar way with the regular missing values. However, #' \code{tagged_na()} values can do this. #' Tagged \code{NA}s work exactly like regular R missing values #' except that they store one additional byte of information: a tag, #' which is usually a letter ("a" to "z") or character number ("0" to "9"). #' This allows to indicate different missings. #' \cr \cr #' Furthermore, see 'Details' in \code{\link{get_values}}. #' #' @examples #' library(haven) #' x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), #' c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), #' "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) #' # get current NA values #' x #' get_na(x) #' # which NA has which tag? #' get_na(x, as.tag = TRUE) #' #' # replace only the NA, which is tagged as NA(c) #' if (require("sjmisc")) { #' replace_na(x, value = 2, tagged.na = "c") #' get_na(replace_na(x, value = 2, tagged.na = "c")) #' #' # data frame as input #' y <- labelled(c(2:3, 3:1, tagged_na("y"), 4:1), #' c("Agreement" = 1, "Disagreement" = 4, "Why" = tagged_na("y"))) #' get_na(data.frame(x, y)) #' } #' @export get_na <- function(x, as.tag = FALSE) { UseMethod("get_na") } #' @export get_na.data.frame <- function(x, as.tag = FALSE) { lapply(x, FUN = get_na_helper, as.tag) } #' @export get_na.list <- function(x, as.tag = FALSE) { lapply(x, FUN = get_na_helper, as.tag) } #' @export get_na.default <- function(x, as.tag = FALSE) { get_na_helper(x, as.tag) } get_na_helper <- function(x, as.tag) { # get values values <- attr(x, "labels", exact = TRUE) # any labelled? if (is.null(values)) return(NULL) if (!requireNamespace("haven", quietly = TRUE)) { stop("Package 'haven' required for this function. Please install it.") } # get NA nas <- values[haven::is_tagged_na(values)] # if we have no *tagged* NA, return NULL if (length(nas) == 0) nas <- NULL # print as tag? if (as.tag && !is.null(nas)) { # save names nn <- names(nas) # make character vector with NA tags nas <- paste0("NA(", haven::na_tag(nas), ")") # set back names names(nas) <- nn } # return missing values nas } sjlabelled/R/get_label.R0000644000176200001440000001036514046276513014632 0ustar liggesusers#' @title Retrieve variable label(s) of labelled data #' @name get_label #' #' @description This function returns the variable labels of labelled data. #' #' @seealso See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package} #' for more details; \code{\link{set_label}} to manually set variable labels or \code{\link{get_labels}} #' to get value labels; \code{\link{var_labels}} to set multiple variable #' labels at once. #' @param x A data frame with variables that have label attributes (e.g. #' from an imported SPSS, SAS or STATA data set, via \code{\link{read_spss}}, #' \code{\link{read_sas}} or \code{\link{read_stata}}); a variable #' (vector) with variable label attribute; or a \code{list} of variables #' with variable label attributes. See 'Examples'. #' @param ... Optional, names of variables, where labels should be retrieved. #' Required, if either data is a data frame and no vector, or if only #' selected variables from \code{x} should be used in the function. #' Convenient argument to work with pipe-chains (see 'Examples'). #' @param def.value Optional, a character string which will be returned as label #' if \code{x} has no label attribute. By default, \code{NULL} is returned. #' #' @inheritParams term_labels #' #' @return A named character vector with all variable labels from the data frame or list; #' or a simple character vector (of length 1) with the variable label, if \code{x} is a variable. #' If \code{x} is a single vector and has no label attribute, the value #' of \code{def.value} will be returned (which is by default \code{NULL}). #' #' @note \code{\link{var_labels}} is an alternative way to set variable labels, #' which follows the philosophy of tidyvers API design (data as first argument, #' dots as value pairs indicating variables) #' #' @examples #' # import SPSS data set #' # mydat <- read_spss("my_spss_data.sav", enc="UTF-8") #' #' # retrieve variable labels #' # mydat.var <- get_label(mydat) #' #' # retrieve value labels #' # mydat.val <- get_labels(mydat) #' #' data(efc) #' #' # get variable lable #' get_label(efc$e42dep) #' #' # alternative way #' get_label(efc)["e42dep"] #' #' # 'get_label()' also works within pipe-chains #' library(magrittr) #' efc %>% get_label(e42dep, e16sex) #' #' # set default values #' get_label(mtcars, mpg, cyl, def.value = "no var labels") #' #' # simple barplot #' barplot(table(efc$e42dep)) #' # get value labels to annotate barplot #' barplot(table(efc$e42dep), #' names.arg = get_labels(efc$e42dep), #' main = get_label(efc$e42dep)) #' #' # get labels from multiple variables #' get_label(list(efc$e42dep, efc$e16sex, efc$e15relat)) #' #' # use case conversion for human-readable labels #' data(iris) #' get_label(iris, def.value = colnames(iris)) #' get_label(iris, def.value = colnames(iris), case = "parsed") #' @export get_label <- function(x, ..., def.value = NULL, case = NULL) { UseMethod("get_label") } #' @export get_label.data.frame <- function(x, ..., def.value = NULL, case = NULL) { dots <- as.character(match.call(expand.dots = FALSE)$`...`) x <- .get_dot_data(x, dots) sapply(seq_along(x), function(i) { # get label label <- attr(x[[i]], "label", exact = TRUE) # any label? if (is.null(label)) { if (!is.null(def.value)) { # def.value may also apply to data frame arguments, # so it can be greater than length one if (i <= length(def.value)) label <- def.value[i] else label <- def.value } else { label <- "" } } names(label) <- colnames(x)[i] # append to return result convert_case(label, case) }) } #' @export get_label.list <- function(x, ..., def.value = NULL, case = NULL) { convert_case(unlist(lapply(x, attr, "label", exact = TRUE)), case) } #' @export get_label.default <- function(x, ..., def.value = NULL, case = NULL) { labels <- attr(x, "label", exact = TRUE) if (is.null(labels)) convert_case(def.value, case) else convert_case(labels, case) } sjlabelled/R/as_labelled.R0000644000176200001440000001212614046440014015125 0ustar liggesusers#' @title Convert vector to labelled class #' @name as_labelled #' #' @description Converts a (labelled) vector of any class into a \code{labelled} #' class vector, resp. adds a \code{labelled} class-attribute. #' #' @param x Variable (vector), \code{data.frame} or \code{list} of variables #' that should be converted to \code{\link[haven:labelled]{labelled()}}-class #' objects. #' @param add.labels Logical, if \code{TRUE}, non-labelled values will be #' labelled with the corresponding value. #' @param add.class Logical, if \code{TRUE}, \code{x} preserves its former #' \code{class}-attribute and \code{labelled} is added as additional #' attribute. If \code{FALSE} (default), all former \code{class}-attributes #' will be removed and the class-attribute of \code{x} will only #' be \code{labelled}. #' @param skip.strings Logical, if \code{TRUE}, character vector are not converted #' into labelled-vectors. Else, character vectors are converted to factors #' vector and the associated values are used as value labels. #' @param tag.na Logical, if \code{TRUE}, tagged \code{NA} values are replaced #' by their associated values. This is required, for instance, when writing #' data back to SPSS. #' #' @return \code{x}, as \code{labelled}-class object. #' #' @examples #' data(efc) #' str(efc$e42dep) #' #' x <- as_labelled(efc$e42dep) #' str(x) #' #' x <- as_labelled(efc$e42dep, add.class = TRUE) #' str(x) #' #' a <- c(1, 2, 4) #' x <- as_labelled(a, add.class = TRUE) #' str(x) #' #' data(efc) #' x <- set_labels(efc$e42dep, #' labels = c(`1` = "independent", `4` = "severe dependency")) #' x1 <- as_labelled(x, add.labels = FALSE) #' x2 <- as_labelled(x, add.labels = TRUE) #' #' str(x1) #' str(x2) #' #' get_values(x1) #' get_values(x2) #' @importFrom stats na.omit #' @export as_labelled <- function(x, add.labels = FALSE, add.class = FALSE, skip.strings = FALSE, tag.na = FALSE) { UseMethod("as_labelled") } #' @export as_labelled.data.frame <- function(x, add.labels = FALSE, add.class = FALSE, skip.strings = FALSE, tag.na = FALSE) { data_frame(lapply(x, FUN = as_labelled_helper, add.labels, add.class, skip.strings, tag.na)) } #' @export as_labelled.list <- function(x, add.labels = FALSE, add.class = FALSE, skip.strings = FALSE, tag.na = FALSE) { lapply(x, FUN = as_labelled_helper, add.labels, add.class, skip.strings, tag.na) } #' @export as_labelled.default <- function(x, add.labels = FALSE, add.class = FALSE, skip.strings = FALSE, tag.na = FALSE) { as_labelled_helper(x, add.labels, add.class, skip.strings, tag.na) } as_labelled_helper <- function(x, add.labels, add.class, skip.strings, tag.na) { # do nothing for labelled class if (is_labelled(x)) return(x) if (is.character(x) && skip.strings) return(x) # if factor, convert to numeric if (is.factor(x)) x <- as_numeric(x, keep.labels = TRUE) # return atomics if (is.null(get_labels(x, attr.only = TRUE))) return(x) # fill up missing attributes if (add.labels) x <- fill_labels(x) # reset missings if (!tag.na) { xna <- get_na(x) if (!isempty(xna)) { x <- set_na(x, na = xna) } } else { if (!requireNamespace("haven", quietly = TRUE)) { stop("Package 'haven' required for this function. Please install it.") } xna <- get_na(x, as.tag = TRUE) if (!isempty(xna)) { labels <- attr(x, "labels", exact = TRUE) new_tags <- unname(gsub("NA\\((.*)\\)", "\\1", xna)) names(new_tags) <- new_tags # convert to numeric, if character numeric_na <- which(is.na(suppressWarnings(as.numeric(new_tags)))) if (any(numeric_na)) { names(new_tags)[numeric_na] <- match(new_tags[numeric_na], letters) * -1 } tagged_missing <- haven::na_tag(x) for (i in 1:length(xna)) { x[which(tagged_missing == new_tags[i])] <- as.numeric(names(new_tags[i])) } labels[is.na(labels)] <- stats::setNames(attr(x, "na.values"), names(labels[is.na(labels)])) attr(x, "labels") <- labels } } # is type of labels same as type of vector? typically, character # vectors can have numeric labels or vice versa, numeric vectors # have "numeric" labels as character strings. in this case, # harmonize types of vector and labels, so haven doesn't complain lt <- as.vector(attr(x, "labels", exact = TRUE)) if (!is.null(lt) && typeof(lt) != typeof(x)) { lab.at <- attr(x, "labels", exact = TRUE) nlab <- names(lab.at) if (is.num.chr(lt, na.rm = TRUE)) { lab.at <- as.numeric(lab.at) names(lab.at) <- nlab } else { lab.at <- as.character(lab.at) names(lab.at) <- nlab } attr(x, "labels") <- lab.at } # get former class attributes xc <- class(x) if (add.class) class(x) <- c(xc, "haven_labelled", "vctrs_vctr") else class(x) <- c("haven_labelled", "vctrs_vctr") # add haven labelled SPSS class if (tag.na) { class(x) <- c("haven_labelled_spss", class(x)) } x } sjlabelled/R/get_values.R0000644000176200001440000000673614046276530015060 0ustar liggesusers#' @title Retrieve values of labelled variables #' @name get_values #' #' @description This function retrieves the values associated with value labels #' from \code{\link[haven]{labelled}} vectors. Data is also labelled #' when imported from SPSS, SAS or STATA via \code{\link{read_spss}}, #' \code{\link{read_sas}} or \code{\link{read_stata}}. #' #' @seealso \code{\link{get_labels}} for getting value labels and \code{\link{get_na}} #' to get values for missing values. #' #' @param x Variable (vector) with value label attributes; or a data frame or #' list with such variables. #' @param sort.val Logical, if \code{TRUE} (default), values of associated value labels #' are sorted. #' @param drop.na Logical, if \code{TRUE}, tagged NA values are excluded from #' the return value. See 'Examples' and \code{\link{get_na}}. #' #' @return The values associated with value labels from \code{x}, #' or \code{NULL} if \code{x} has no label attributes. #' #' @details \code{\link[haven]{labelled}} vectors are numeric by default (when imported with read-functions #' like \code{\link{read_spss}}) and have variable and value labels attributes. #' The value labels are associated with the values from the labelled vector. #' This function returns the values associated with the vector's value labels, #' which may differ from actual values in the vector (e.g. if not all #' values have a related label). #' #' @examples #' data(efc) #' str(efc$e42dep) #' get_values(efc$e42dep) #' get_labels(efc$e42dep) #' #' library(haven) #' x <- labelled(c(1:3, tagged_na("a", "c", "z"), 4:1), #' c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), #' "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))) #' # get all values #' get_values(x) #' # drop NA #' get_values(x, drop.na = TRUE) #' #' # data frame as input #' y <- labelled(c(2:3, 3:1, tagged_na("y"), 4:1), #' c("Agreement" = 1, "Disagreement" = 4, "Why" = tagged_na("y"))) #' get_values(data.frame(x, y)) #' #' @export get_values <- function(x, sort.val = TRUE, drop.na = FALSE) { UseMethod("get_values") } #' @export get_values.data.frame <- function(x, sort.val = TRUE, drop.na = FALSE) { lapply(x, FUN = get_values_helper, sort.val, drop.na) } #' @export get_values.list <- function(x, sort.val = TRUE, drop.na = FALSE) { lapply(x, FUN = get_values_helper, sort.val, drop.na) } #' @export get_values.default <- function(x, sort.val = TRUE, drop.na = FALSE) { get_values_helper(x, sort.val, drop.na) } get_values_helper <- function(x, sort.val = TRUE, drop.na = FALSE) { # get labels labels <- attr(x, "labels", exact = TRUE) # nothing found? then leave... if (is.null(labels)) return(NULL) # get values if (is.character(x) || (is.factor(x) && !is.num.fac(x))) values <- unname(labels) else values <- as.numeric(unname(labels)) if (requireNamespace("haven", quietly = TRUE)) { # do we have any tagged NAs? if (any(haven::is_tagged_na(values)) && !drop.na) { values[haven::is_tagged_na(values)] <- paste0("NA(", haven::na_tag(values[haven::is_tagged_na(values)]), ")") } } # sort values if (sort.val) values <- sort(values) # remove missing value codes? if (drop.na) values <- values[!is.na(values)] # return sorted values } sjlabelled/R/select_helpers.R0000644000176200001440000000301713556327266015720 0ustar liggesusersstring_starts_with <- function(pattern, x) { pattern <- paste0("^\\Q", pattern, "\\E") grep(pattern, x, perl = TRUE) } string_contains <- function(pattern, x) { pattern <- paste0("\\Q", pattern, "\\E") grep(pattern, x, perl = TRUE) } string_ends_with <- function(pattern, x) { pattern <- paste0("\\Q", pattern, "\\E$") grep(pattern, x, perl = TRUE) } string_one_of <- function(pattern, x) { unlist(lapply(pattern, function(.x) grep(.x, x, fixed = TRUE, useBytes = TRUE))) } rownames_as_column <- function(x, var = "rowname") { rn <- data.frame(rn = rownames(x), stringsAsFactors = FALSE) x <- cbind(rn, x) colnames(x)[1] <- var rownames(x) <- NULL x } obj_has_name <- function(x, name) { name %in% names(x) } obj_has_rownames <- function(x) { !identical(as.character(1:nrow(x)), rownames(x)) } add_cols <- function(data, ..., .after = 1, .before = NULL) { if (is.character(.after)) .after <- which(colnames(data) == .after) if (!is.null(.before) && is.character(.before)) .after <- which(colnames(data) == .before) - 1 if (!is.null(.before) && is.numeric(.before)) .after <- .before - 1 dat <- data.frame(..., stringsAsFactors = FALSE) if (.after < 1) { cbind(dat, data) } else if (is.infinite(.after)) { cbind(data, dat) } else { c1 <- 1:.after c2 <- (.after + 1):ncol(data) x1 <- data[, colnames(data)[c1], drop = FALSE] x2 <- data[, colnames(data)[c2], drop = FALSE] cbind(x1, dat, x2) } } sjlabelled/R/is_labelled.R0000644000176200001440000000061613446531213015143 0ustar liggesusers#' @title Check whether object is of class "labelled" #' @name is_labelled #' @description This function checks whether \code{x} is of class \code{labelled}. #' #' @param x An object. #' @return Logical, \code{TRUE} if \code{x} inherits from class \code{labelled}, #' \code{FALSE} otherwise. #' #' @export is_labelled <- function(x) inherits(x, c("labelled", "haven_labelled")) sjlabelled/R/remove_label.R0000644000176200001440000000200213575744600015340 0ustar liggesusers#' @title Remove variable labels from variables #' @name remove_label #' #' @description Remove variable labels from variables. #' #' @seealso \code{\link{set_label}} to manually set variable labels or #' \code{\link{get_label}} to get variable labels; \code{\link{set_labels}} to #' add value labels, replacing the existing ones (and removing non-specified #' value labels). #' #' @param x A vector or data frame. #' @inheritParams as_factor #' #' @return \code{x} with removed variable labels #' #' @examples #' data(efc) #' x <- efc[, 1:5] #' get_label(x) #' str(x) #' #' x <- remove_label(x) #' get_label(x) #' str(x) #' @export remove_label <- function(x, ...) { dots <- as.character(match.call(expand.dots = FALSE)$`...`) .dat <- .get_dot_data(x, dots) if (is.data.frame(x)) { # iterate variables of data frame for (i in colnames(.dat)) { attr(x[[i]], "label") <- NULL } } else { attr(x, "label") <- NULL } x } sjlabelled/R/unlabel.R0000644000176200001440000000343713675075520014342 0ustar liggesusers#' @title Convert labelled vectors into normal classes #' @name unlabel #' #' @description This function converts \code{labelled} class vectors #' into a generic data format, which means that simply all \code{labelled} #' class attributes will be removed, so all vectors / variables will most #' likely become \code{atomic}. #' #' @param x A data frame, which contains \code{labelled} class #' vectors or a single vector of class \code{labelled}. #' #' @inheritParams read_spss #' #' @return A data frame or single vector (depending on \code{x}) with common object classes. #' #' @note This function is currently only used to avoid possible compatibility issues #' with \code{\link[haven:labelled]{labelled}} class vectors. Some known issues with #' \code{labelled} class vectors have already been fixed, so #' it might be that this function will become redundant in the future. #' #' @importFrom utils txtProgressBar setTxtProgressBar #' @export unlabel <- function(x, verbose = FALSE) { # check if complete data frame or only single # vector should be converted if (is.data.frame(x)) { # create progress bar if (verbose) pb <- utils::txtProgressBar(min = 0, max = ncol(x), style = 3) else pb <- NULL # tell user... if (verbose) message("Converting labelled-classes. Please wait...\n") for (i in seq_len(ncol(x))) { # remove labelled class if (is_labelled(x[[i]])) x[[i]] <- unclass(x[[i]]) # update progress bar if (verbose) utils::setTxtProgressBar(pb, i) } if (!is.null(pb)) close(pb) # remove redundant class attributes x <- as.data.frame(x, stringsAsFactors = FALSE) } else { # remove labelled class if (is_labelled(x)) x <- unclass(x) } x } sjlabelled/R/drop_labels.R0000644000176200001440000000326414046276420015177 0ustar liggesusers#' @rdname zap_labels #' @export drop_labels <- function(x, ..., drop.na = TRUE) { dots <- as.character(match.call(expand.dots = FALSE)$`...`) .dat <- .get_dot_data(x, dots) if (is.data.frame(x)) { # iterate variables of data frame for (i in colnames(.dat)) { x[[i]] <- drop_labels_helper(.dat[[i]], drop.na = drop.na) } } else { x <- drop_labels_helper(.dat, drop.na = drop.na) } x } drop_labels_helper <- function(x, drop.na) { # retrieve named labels tidy.labels <- attr(x, "labels", exact = TRUE) if (requireNamespace("haven", quietly = TRUE)) { tidy.labels <- tidy.labels[!haven::is_tagged_na(tidy.labels)] } # return x, if no attribute if (is.null(tidy.labels)) return(x) # all missing in variable? if (all(is.na(x))) return(x) # remove labels with no values in data tidy.labels <- tidy.labels[get_values(x, drop.na = drop.na) %in% names(table(x))] # check if tidy labels is empty - then remove everything if (isempty(tidy.labels)) tidy.labels <- "" # check if user wants to keep labels for NA values or not. if (!drop.na) { current.na <- get_na(x) if (!is.null(current.na) && length(current.na) > 0) tidy.labels <- c(tidy.labels, current.na) } # set back labels if (isempty(tidy.labels)) { attr(x, "labels") <- NULL } else { attr(x, "labels") <- tidy.labels # if labels, e.g. due to tagged NA, are no longer of same # type as labelled vector, remove labelled class attribute - # else, haven will throw errors if (inherits(x, c("labelled", "haven_labelled")) && typeof(x) != typeof(tidy.labels)) x <- unclass(x) } x } sjlabelled/R/helpfunctions.R0000644000176200001440000000373714046276534015605 0ustar liggesusersdata_frame <- function(...) { x <- data.frame(..., stringsAsFactors = FALSE) rownames(x) <- NULL x } # do we have a stan-model? is.stan <- function(x) inherits(x, c("stanreg", "stanfit", "brmsfit")) # return names of objects passed as ellipses argument dot_names <- function(dots) unname(unlist(lapply(dots, as.character))) is_float <- function(x) is.numeric(x) && !all(x %% 1 == 0, na.rm = TRUE) is.num.fac <- function(x) { # check if we have numeric levels !anyNA(suppressWarnings(as.numeric(levels(x)))) } .compact_list <- function(x) x[!sapply(x, function(i) length(i) == 0 || is.null(i) || any(i == "NULL"))] #' @importFrom stats na.omit is.num.chr <- function(x, na.rm = FALSE) { # check if we have numeric character values only if (na.rm) x <- stats::na.omit(x) !anyNA(suppressWarnings(as.numeric(x))) } isempty <- function(x, first.only = TRUE) { # do we have a valid vector? if (!is.null(x)) { # if it's a character, check if we have only one element in that vector if (is.character(x)) { # characters may also be of length 0 if (length(x) == 0) return(TRUE) # else, check all elements of x zero_len <- sapply(x, function(y) { # zero chars, so empty? l <- nchar(y) == 0 # if 'x' was empty, we have no chars, so zero_len will be integer(0). # check this here, because zero_len needs to be logical if (length(l) == 0) l <- TRUE l }) # return result for multiple elements of character vector if (first.only) { zero_len <- isTRUE(zero_len[1]) if (length(x) > 0) x <- x[!is.na(x)][1] } else { return(unname(zero_len)) } # we have a non-character vector here. check for length } else if (is.list(x)) { x <- .compact_list(x) zero_len <- length(x) == 0 } else { zero_len <- length(x) == 0 } } any(is.null(x) || zero_len || all(is.na(x))) } sjlabelled/R/set_label.R0000644000176200001440000001303613575744600014647 0ustar liggesusers#' @title Add variable label(s) to variables #' @name set_label #' #' @description This function adds variable labels as attribute #' (named \code{"label"}) to the variable \code{x}, resp. to a #' set of variables in a data frame or a list-object. \code{var_labels()} #' is intended for use within pipe-workflows and has a tidyverse-consistent #' syntax, including support for quasi-quotation (see 'Examples'). #' #' @seealso See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package} #' for more details; \code{\link{set_labels}} to manually set value labels or \code{\link{get_label}} #' to get variable labels. #' #' @param x Variable (vector), list of variables or a data frame where variables #' labels should be added as attribute. For \code{var_labels()}, \code{x} #' must be a data frame only. #' @param ... Pairs of named vectors, where the name equals the variable name, #' which should be labelled, and the value is the new variable label. #' @param label If \code{x} is a vector (single variable), use a single character string with #' the variable label for \code{x}. If \code{x} is a data frame, use a #' vector with character labels of same length as \code{ncol(x)}. #' Use \code{label = ""} to remove labels-attribute from \code{x}, resp. #' set any value of vector \code{label} to \code{""} to remove specific variable #' label attributes from a data frame's variable. #' @param value See \code{label}. #' #' @return \code{x}, with variable label attribute(s), which contains the #' variable name(s); or with removed label-attribute if #' \code{label = ""}. #' #' @examples #' # manually set value and variable labels #' dummy <- sample(1:4, 40, replace = TRUE) #' dummy <- set_labels(dummy, labels = c("very low", "low", "mid", "hi")) #' dummy <- set_label(dummy, label = "Dummy-variable") #' #' # or use: #' # set_label(dummy) <- "Dummy-variable" #' #' # auto-detection of value labels by default, auto-detection of #' # variable labels if argument "title" set to NULL. #' \dontrun{ #' library(sjPlot) #' sjp.frq(dummy, title = NULL)} #' #' # Set variable labels for data frame #' dummy <- data.frame( #' a = sample(1:4, 10, replace = TRUE), #' b = sample(1:4, 10, replace = TRUE), #' c = sample(1:4, 10, replace = TRUE) #' ) #' dummy <- set_label(dummy, c("Variable A", "Variable B", "Variable C")) #' str(dummy) #' #' # remove one variable label #' dummy <- set_label(dummy, c("Variable A", "", "Variable C")) #' str(dummy) #' #' # setting same variable labels to multiple vectors #' #' # create a set of dummy variables #' dummy1 <- sample(1:4, 40, replace = TRUE) #' dummy2 <- sample(1:4, 40, replace = TRUE) #' dummy3 <- sample(1:4, 40, replace = TRUE) #' # put them in list-object #' dummies <- list(dummy1, dummy2, dummy3) #' # and set variable labels for all three dummies #' dummies <- set_label(dummies, c("First Dummy", "2nd Dummy", "Third dummy")) #' # see result... #' get_label(dummies) #' #' #' # use 'var_labels()' to set labels within a pipe-workflow, and #' # when you need "tidyverse-consistent" api. #' # Set variable labels for data frame #' dummy <- data.frame( #' a = sample(1:4, 10, replace = TRUE), #' b = sample(1:4, 10, replace = TRUE), #' c = sample(1:4, 10, replace = TRUE) #' ) #' #' library(magrittr) #' dummy %>% #' var_labels(a = "First variable", c = "third variable") %>% #' get_label() #' #' # with quasi-quotation #' library(rlang) #' v1 <- "First variable" #' v2 <- "Third variable" #' dummy %>% #' var_labels(a = !!v1, c = !!v2) %>% #' get_label() #' #' x1 <- "a" #' x2 <- "c" #' dummy %>% #' var_labels(!!x1 := !!v1, !!x2 := !!v2) %>% #' get_label() #' #' @export set_label <- function(x, label) { # do we have all necessary arguments? if (!is.null(label) && !is.null(x)) { # if we have a data frame, we need a variable label # for each column (variable) of the data frame if (is.data.frame(x) || is.list(x)) { # get length of data frame or list, i.e. # determine number of variables if (is.data.frame(x)) nvars <- ncol(x) else nvars <- length(x) # check for matching length of supplied labels if (nvars != length(label)) { message("Argument `label` must be of same length as numbers of columns in `x`.") } else { # do we have a data frame? If yes, save column names if (is.data.frame(x)) cnames <- colnames(x) # iterate all columns / list elements for (i in seq_len(nvars)) { if (isempty(label[i])) { # empty label value means, remove # the label attribute attr(x[[i]], "label") <- NULL } else { # set variable label attr(x[[i]], "label") <- label[i] # set names attribute. equals variable name if (is.data.frame(x)) names(attr(x[[i]], "label")) <- cnames[i] } } } } else { if (isempty(label)) # empty label, so remove label attribute attr(x, "label") <- NULL else # set label attribute attr(x, "label") <- label } } x } #' @rdname set_label #' @export `set_label<-` <- function(x, value) { UseMethod("set_label<-") } #' @export `set_label<-.default` <- function(x, value) { x <- set_label(x, value) x } sjlabelled/R/label_to_colnames.R0000644000176200001440000000217314046415064016350 0ustar liggesusers#' @title Use variable labels as column names #' @name label_to_colnames #' #' @description This function sets variable labels as column names, to use "labelled #' data" also for those functions that cannot cope with labelled data by default. #' #' @param x A data frame. #' @inheritParams as_factor #' #' @return \code{x} with variable labels as column names. For variables without #' variable labels, the column name is left unchanged. #' #' @examples #' data(iris) #' #' iris <- var_labels( #' iris, #' Petal.Length = "Petal length (cm)", #' Petal.Width = "Petal width (cm)" #' ) #' #' colnames(iris) #' plot(iris) #' #' colnames(label_to_colnames(iris)) #' plot(label_to_colnames(iris)) #' @export label_to_colnames <- function(x, ...) { dots <- as.character(match.call(expand.dots = FALSE)$`...`) .dat <- .get_dot_data(x, dots) if (!is.null(ncol(.dat)) && ncol(.dat) > 0) { replace_index <- match(colnames(.dat), colnames(x)) colnames(x)[replace_index] <- get_label(.dat, def.value = colnames(.dat)) } else { colnames(x) <- get_label(x, def.value = colnames(x)) } x }sjlabelled/R/get_model_labels.R0000644000176200001440000002160214046440416016164 0ustar liggesusers#' @title Retrieve labels of model terms from regression models #' @name term_labels #' #' @description This function retrieves variable labels from model terms. In case #' of categorical variables, where one variable has multiple dummies, #' variable name and category value is returned. #' #' @param models One or more fitted regression models. May also be glm's or #' mixed models. #' @param mark.cat Logical, if \code{TRUE}, the returned vector has an #' attribute with logical values, which indicate whether a label indicates #' the value from a factor category (attribute value is \code{TRUE}) or #' a term's variable labels (attribute value is \code{FALSE}). #' @param case Desired target case. Labels will automatically converted into the #' specified character case. See \code{\link[snakecase:to_any_case]{to_any_case()}} for #' more details on this argument. #' @param prefix Indicates whether the value labels of categorical variables #' should be prefixed, e.g. with the variable name or variable label. #' May be abbreviated. See 'Examples', #' @param mv,multi.resp Logical, if \code{TRUE} and \code{models} is a multivariate #' response model from a \code{brmsfit} object, then the labels for each #' dependent variable (multiple responses) are returned. #' @param ... Further arguments passed down to \code{to_any_case()}, #' like \code{preprocess} or \code{postprocess}. #' #' @return For \code{term_labels()}, a (named) character vector with #' variable labels of all model terms, which can be used, for instance, #' as axis labels to annotate plots. \cr \cr For \code{response_labels()}, #' a character vector with variable labels from all dependent variables #' of \code{models}. #' #' @details Typically, the variable labels from model terms are returned. However, #' for categorical terms that have estimates for each category, the #' value labels are returned as well. As the return value is a named #' vector, you can easily use it with \pkg{ggplot2}'s \code{scale_*()} #' functions to annotate plots. #' #' @examples #' # use data set with labelled data #' data(efc) #' #' fit <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) #' term_labels(fit) #' #' # make "education" categorical #' if (require("sjmisc")) { #' efc$c172code <- to_factor(efc$c172code) #' fit <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) #' term_labels(fit) #' #' # prefix value of categorical variables with variable name #' term_labels(fit, prefix = "varname") #' #' # prefix value of categorical variables with value label #' term_labels(fit, prefix = "label") #' #' # get label of dv #' response_labels(fit) #' } #' @importFrom insight find_parameters get_data #' @importFrom stats model.frame coef terms #' @export term_labels <- function(models, mark.cat = FALSE, case = NULL, prefix = c("none", "varname", "label"), ...) { prefix <- match.arg(prefix) # to be generic, make sure argument is a list if (!inherits(models, "list")) models <- list(models) # get model terms and model frame m <- try(lapply(models, function(.x) insight::find_predictors(.x, flatten = TRUE)), silent = TRUE) mf <- try(lapply(models, function(.x) insight::get_data(.x)[, -1, drop = FALSE]), silent = TRUE) # return NULL on error if (inherits(m, "try-error") || inherits(mf, "try-error")) { return(NULL) } # get all variable labels for predictors lbs1 <- unlist(lapply(1:length(m), function(x) { if (is.null(mf[[x]])) { m[[x]][-1] } else { get_label(mf[[x]], def.value = colnames(mf[[x]])) } })) # any empty name? if yes, use label as name empty <- nchar(names(lbs1)) if (any(empty == 0)) { empty <- which(empty == 0) names(lbs1)[empty] <- lbs1[empty] } # for categorical predictors, we have one term per # value (factor level), so extract these as well lbs2 <- lapply(mf, function(.x) { unlist(mapply(function(.x, .y) { if (is.factor(.x)) { l <- get_labels(.x) if (!anyNA(suppressWarnings(as.numeric(l)))) paste0(.y, l) else l } }, .x, colnames(.x), SIMPLIFY = FALSE)) }) fixed.names <- lapply(mf, function(.x) { unlist(mapply(function(.x, .y) { if (is.factor(.x)) paste0(.y, levels(.x)) }, .x, colnames(.x), SIMPLIFY = FALSE)) }) # flatten, if we have any elements. in case all predictors # were non-factors, list has only NULLs lbs2 <- if (!is.null(unlist(lbs2))) as.character(unlist(lbs2)) else NULL fixed.names <- if (!is.null(unlist(fixed.names))) as.character(unlist(fixed.names)) else NULL names(lbs2) <- unname(fixed.names) # create logical to indicate which labels come from factors fl1 <- vector(mode = "logical", length = length(lbs1)) if (!is.null(lbs2)) { fl2 <- vector(mode = "logical", length = length(lbs2)) fl2[1:length(fl2)] <- TRUE } else { fl2 <- NULL } # remove duplicated lbs <- c(lbs1, lbs2) fl <- c(fl1, fl2) keep <- !(duplicated(lbs) & duplicated(names(lbs))) lbs <- lbs[keep] fl <- fl[keep] # set default names for values if (is.null(names(lbs))) names(lbs) <- lbs # do we have partial empty names? if yes, fill them en <- which(nchar(names(lbs)) == 0) if (!isempty(en)) names(lbs)[en] <- lbs[en] # prefix labels if (prefix != "none") lbs <- prepare.labels(lbs, catval = fl, style = prefix) # the vector now contains all possible labels, as named vector. # since ggplot uses named vectors as labels for axis-scales, matching # of labels is done automatically lbs <- convert_case(lbs, case, ...) # check if attribute is requested if (mark.cat) attr(lbs, "category.value") <- fl lbs } #' @rdname term_labels #' @export get_term_labels <- term_labels prepare.labels <- function(x, catval, style = c("varname", "label")) { x_var <- names(x[!catval]) x_val <- names(x[catval]) for (i in x_var) { pos <- string_starts_with(pattern = i, x = x_val) if (!isempty(pos) && length(pos) > 0) { match.vals <- x_val[pos] if (style == "label") x[match.vals] <- sprintf("%s: %s", x[i], x[match.vals]) else x[match.vals] <- sprintf("%s: %s", i, x[match.vals]) } } x } #' @rdname term_labels #' @importFrom stats model.frame #' @export response_labels <- function(models, case = NULL, multi.resp = FALSE, mv = FALSE, ...) { if (!missing(multi.resp)) mv <- multi.resp # to be generic, make sure argument is a list if (!inherits(models, "list")) models <- list(models) intercepts.names <- tryCatch({ lapply(models, function(x) { if (inherits(x, "brmsfit")) { if (is.null(stats::formula(x)$formula) && !is.null(stats::formula(x)$responses)) if (mv) stats::formula(x)$responses else paste(stats::formula(x)$responses, collapse = ", ") else deparse(stats::formula(x)$formula[[2L]]) } else if (inherits(x, "stanmvreg")) { if (mv) sapply(stats::formula(x), function(.x) deparse(.x[[2L]], width.cutoff = 500), simplify = TRUE) else paste(sapply(stats::formula(x), function(.x) deparse(.x[[2L]], width.cutoff = 500), simplify = TRUE), collapse = ", ") } else { deparse(stats::formula(x)[[2L]]) } })}, error = function(x) { NULL }, warning = function(x) { NULL } ) mf <- tryCatch({ mapply( function(x, y) { m <- insight::get_data(x) if (mv && inherits(x, "brmsfit")) colnames(m) <- gsub(pattern = "_", replacement = "", x = colnames(m), fixed = TRUE) y <- y[obj_has_name(m, y)] if (length(y) > 0) { m[, y, drop = FALSE] } else { m[[1]] } }, models, intercepts.names, SIMPLIFY = FALSE )}, error = function(x) { NULL }, warning = function(x) { NULL } ) if (is.null(intercepts.names) || is.null(mf)) { return(rep_len("Dependent variable", length.out = length(models))) } # get all labels lbs <- mapply(function(.x, .y) get_label(.x, def.value = .y), mf, intercepts.names, SIMPLIFY = FALSE) # flatten list, and check for correct elements lbs <- as.character(unlist(lbs)) # There are some formulas that return a rather cryptic # name. In such cases, the variable name might have more # than 1 element, and here we need to set a proper default if (!mv && length(lbs) > length(models)) lbs <- "Dependent variable" convert_case(lbs, case, ...) } #' @rdname term_labels #' @export get_dv_labels <- response_labelssjlabelled/R/as_factor.R0000644000176200001440000001152514046456362014656 0ustar liggesusers#' @title Convert variable into factor and keep value labels #' @name as_factor #' #' @description This function converts a variable into a factor, but preserves #' variable and value label attributes. #' #' @param x A vector or data frame. #' @param ... Optional, unquoted names of variables that should be selected for #' further processing. Required, if \code{x} is a data frame (and no #' vector) and only selected variables from \code{x} should be processed. #' You may also use functions like \code{:} or tidyselect's select-helpers. #' See 'Examples'. #' @param add.non.labelled Logical, if \code{TRUE}, non-labelled values also #' get value labels. #' #' @return A factor, including variable and value labels. If \code{x} #' is a data frame, the complete data frame \code{x} will be returned, #' where variables specified in \code{...} are coerced #' to factors (including variable and value labels); #' if \code{...} is not specified, applies to all variables in the #' data frame. #' #' @note This function is intended for use with vectors that have value and variable #' label attributes. Unlike \code{\link{as.factor}}, \code{as_factor} converts #' a variable into a factor and preserves the value and variable label attributes. #' \cr \cr #' Adding label attributes is automatically done by importing data sets #' with one of the \code{read_*}-functions, like \code{\link{read_spss}}. #' Else, value and variable labels can be manually added to vectors #' with \code{\link{set_labels}} and \code{\link{set_label}}. #' #' @details \code{as_factor} converts numeric values into a factor with numeric #' levels. \code{\link{as_label}}, however, converts a vector into #' a factor and uses value labels as factor levels. #' #' @examples #' if (require("sjmisc") && require("magrittr")) { #' data(efc) #' # normal factor conversion, loses value attributes #' x <- as.factor(efc$e42dep) #' frq(x) #' #' # factor conversion, which keeps value attributes #' x <- as_factor(efc$e42dep) #' frq(x) #' #' # create partially labelled vector #' x <- set_labels( #' efc$e42dep, #' labels = c( #' `1` = "independent", #' `4` = "severe dependency", #' `9` = "missing value" #' )) #' #' # only copy existing value labels #' as_factor(x) %>% head() #' get_labels(as_factor(x), values = "p") #' #' # also add labels to non-labelled values #' as_factor(x, add.non.labelled = TRUE) %>% head() #' get_labels(as_factor(x, add.non.labelled = TRUE), values = "p") #' #' #' # easily coerce specific variables in a data frame to factor #' # and keep other variables, with their class preserved #' as_factor(efc, e42dep, e16sex, c172code) %>% head() #' #' # use select-helpers from dplyr-package #' if (require("dplyr")) { #' as_factor(efc, contains("cop"), c161sex:c175empl) %>% head() #' } #' } #' @export as_factor <- function(x, ...) { UseMethod("as_factor") } #' @rdname as_factor #' @export to_factor <- as_factor #' @export as_factor.default <- function(x, add.non.labelled = FALSE, ...) { to_fac_helper(x, add.non.labelled) } #' @rdname as_factor #' @export as_factor.data.frame <- function(x, ..., add.non.labelled = FALSE) { dots <- sapply(eval(substitute(alist(...))), deparse) .dat <- .get_dot_data(x, dots) for (i in colnames(.dat)) { x[[i]] <- to_fac_helper(.dat[[i]], add.non.labelled) } x } to_fac_helper <- function(x, add.non.labelled) { # is already factor? if (is.factor(x)) return(x) # retrieve value labels lab <- get_labels( x, attr.only = TRUE, values = "n", non.labelled = add.non.labelled ) # retrieve variable labels varlab <- attr(x, "label", exact = TRUE) na_values <- attr(x, "na_values", exact = TRUE) if (is.null(na_values)) { na_values <- attr(x, "na.values", exact = TRUE) } # switch value and names attribute, since get_labels # returns the values as names, and the value labels # as "vector content" if (!is.null(lab)) { if (is.character(x) || (is.factor(x) && !is.num.fac(x))) lab.switch <- names(lab) else lab.switch <- as.numeric(names(lab)) names(lab.switch) <- as.character(lab) } else { lab.switch <- NULL } # convert variable to factor x <- factor(x, exclude = c(NA_character_, "NaN")) # set back value labels x <- suppressMessages( set_labels( x, labels = lab.switch, force.labels = TRUE, force.values = FALSE ) ) # set back variable labels attr(x, "label") <- varlab attr(x, "na_values") <- na_values attr(x, "na.values") <- na_values x } sjlabelled/R/as_label.R0000644000176200001440000002276314046276441014463 0ustar liggesusers#' @title Convert variable into factor with associated value labels #' @name as_label #' #' @description \code{as_label()} converts (replaces) values of a variable (also of factors #' or character vectors) with their associated value labels. Might #' be helpful for factor variables. #' For instance, if you have a Gender variable with 0/1 value, and associated #' labels are male/female, this function would convert all 0 to male and #' all 1 to female and returns the new variable as factor. #' \code{as_character()} does the same as \code{as_label()}, but returns #' a character vector. #' #' @param add.non.labelled Logical, if \code{TRUE}, values without associated #' value label will also be converted to labels (as is). See 'Examples'. #' @param prefix Logical, if \code{TRUE}, the value labels used as factor levels #' or character values will be prefixed with their associated values. See 'Examples'. #' @param var.label Optional string, to set variable label attribute for the #' returned variable (see vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package}). #' If \code{NULL} (default), variable label attribute of \code{x} will #' be used (if present). If empty, variable label attributes will be removed. #' @param drop.na Logical, if \code{TRUE}, tagged \code{NA} values with value labels #' will be converted to regular NA's. Else, tagged \code{NA} values will be replaced #' with their value labels. See 'Examples' and \code{\link{get_na}}. #' @param drop.levels Logical, if \code{TRUE}, unused factor levels will be #' dropped (i.e. \code{\link{droplevels}} will be applied before returning #' the result). #' @param keep.labels Logical, if \code{TRUE}, value labels are preserved This #' allows users to quickly convert back factors to numeric vectors with #' \code{as_numeric()}. #' #' @inheritParams add_labels #' #' @return A factor with the associated value labels as factor levels. If \code{x} #' is a data frame, the complete data frame \code{x} will be returned, #' where variables specified in \code{...} are coerced to factors; #' if \code{...} is not specified, applies to all variables in the #' data frame. \code{as_character()} returns a character vector. #' #' @note Value label attributes (see \code{\link{get_labels}}) #' will be removed when converting variables to factors. #' #' @details See 'Details' in \code{\link{get_na}}. #' #' @examples #' data(efc) #' print(get_labels(efc)['c161sex']) #' head(efc$c161sex) #' head(as_label(efc$c161sex)) #' #' print(get_labels(efc)['e42dep']) #' table(efc$e42dep) #' table(as_label(efc$e42dep)) #' #' head(efc$e42dep) #' head(as_label(efc$e42dep)) #' #' # structure of numeric values won't be changed #' # by this function, it only applies to labelled vectors #' # (typically categorical or factor variables) #' #' str(efc$e17age) #' str(as_label(efc$e17age)) #' #' #' # factor with non-numeric levels #' as_label(factor(c("a", "b", "c"))) #' #' # factor with non-numeric levels, prefixed #' x <- factor(c("a", "b", "c")) #' x <- set_labels(x, labels = c("ape", "bear", "cat")) #' as_label(x, prefix = TRUE) #' #' #' # create vector #' x <- c(1, 2, 3, 2, 4, NA) #' # add less labels than values #' x <- set_labels( #' x, #' labels = c("yes", "maybe", "no"), #' force.labels = FALSE, #' force.values = FALSE #' ) #' #' # convert to label w/o non-labelled values #' as_label(x) #' #' # convert to label, including non-labelled values #' as_label(x, add.non.labelled = TRUE) #' #' #' # create labelled integer, with missing flag #' if (require("haven")) { #' x <- labelled( #' c(1:3, tagged_na("a", "c", "z"), 4:1, 2:3), #' c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), #' "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) #' ) #' #' # to labelled factor, with missing labels #' as_label(x, drop.na = FALSE) #' #' # to labelled factor, missings removed #' as_label(x, drop.na = TRUE) #' #' # keep missings, and use non-labelled values as well #' as_label(x, add.non.labelled = TRUE, drop.na = FALSE) #' } #' #' # convert labelled character to factor #' dummy <- c("M", "F", "F", "X") #' dummy <- set_labels( #' dummy, #' labels = c(`M` = "Male", `F` = "Female", `X` = "Refused") #' ) #' get_labels(dummy,, "p") #' as_label(dummy) #' #' # drop unused factor levels, but preserve variable label #' x <- factor(c("a", "b", "c"), levels = c("a", "b", "c", "d")) #' x <- set_labels(x, labels = c("ape", "bear", "cat")) #' set_label(x) <- "A factor!" #' x #' as_label(x, drop.levels = TRUE) #' #' # change variable label #' as_label(x, var.label = "New variable label!", drop.levels = TRUE) #' #' #' # convert to numeric and back again, preserving label attributes #' # *and* values in numeric vector #' x <- c(0, 1, 0, 4) #' x <- set_labels(x, labels = c(`null` = 0, `one` = 1, `four` = 4)) #' #' # to factor #' as_label(x) #' #' # to factor, back to numeric - values are 1, 2 and 3, #' # instead of original 0, 1 and 4 #' as_numeric(as_label(x)) #' #' # preserve label-attributes when converting to factor, use these attributes #' # to restore original numeric values when converting back to numeric #' as_numeric(as_label(x, keep.labels = TRUE), use.labels = TRUE) #' #' #' # easily coerce specific variables in a data frame to factor #' # and keep other variables, with their class preserved #' as_label(efc, e42dep, e16sex, c172code) #' @export as_label <- function(x, ...) { UseMethod("as_label") } #' @rdname as_label #' @export to_label <- as_label #' @export as_label.default <- function(x, add.non.labelled = FALSE, prefix = FALSE, var.label = NULL, drop.na = TRUE, drop.levels = FALSE, keep.labels = FALSE, ...) { as_label_helper(x, add.non.labelled, prefix, var.label, drop.na, drop.levels, keep.labels) } #' @rdname as_label #' @export as_label.data.frame <- function(x, ..., add.non.labelled = FALSE, prefix = FALSE, var.label = NULL, drop.na = TRUE, drop.levels = FALSE, keep.labels = FALSE) { dots <- sapply(eval(substitute(alist(...))), deparse) .dat <- .get_dot_data(x, dots) # iterate variables of data frame for (i in colnames(.dat)) { x[[i]] <- as_label_helper(.dat[[i]], add.non.labelled, prefix, var.label, drop.na, drop.levels, keep.labels) } x } as_label_helper <- function(x, add.non.labelled, prefix, var.label, drop.na, drop.levels, keep.labels) { # prefix labels? if (prefix) iv <- "p" else iv <- 0 # retrieve variable label if (is.null(var.label)) var_lab <- get_label(x) else var_lab <- var.label # get labels labels <- NULL # keep missings? if (!drop.na) { ## TODO enable replacing tagged NA again # # get NA # current.na <- get_na(x) # # # any NA? # if (!is.null(current.na)) { # if (!requireNamespace("haven", quietly = TRUE)) { # stop("Package 'haven' required for this function. Please install it.") # } # # we have to set all NA labels at once, else NA loses tag # # so we prepare a dummy label-vector, where we copy all different # # NA labels to `x` afterwards # dummy_na <- rep("", times = length(x)) # # # iterare NA # for (i in seq_len(length(current.na))) { # dummy_na[haven::na_tag(x) == haven::na_tag(current.na[i])] <- names(current.na)[i] # } # # x[haven::is_tagged_na(x)] <- dummy_na[haven::is_tagged_na(x)] # } # } else { # in case x has tagged NA's we need to be sure to convert # those into regular NA's, because else saving would not work x[is.na(x)] <- NA } # get value labels vl <- get_labels(x, attr.only = TRUE, values = iv, non.labelled = add.non.labelled, drop.na = drop.na) # check if we have any labels, else # return variable "as is" if (!is.null(vl)) { # get associated values for value labels vnn <- labels <- get_labels( x, attr.only = TRUE, values = "n", non.labelled = add.non.labelled, drop.na = drop.na ) # convert to numeric vn <- suppressWarnings(as.numeric(names(vnn))) # where some values non-numeric? if yes, # use value names as character values if (anyNA(vn)) vn <- names(vnn) # replace values with labels if (is.factor(x)) { # more levels than labels? remain_labels <- levels(x)[!levels(x) %in% vn] # set new levels levels(x) <- c(vl, remain_labels) # remove attributes x <- remove_all_labels(x) } else { x <- as.character(x) for (i in seq_len(length(vl))) { #if label is number, prevents loop from replacing again x[x == vn[i]] <- paste0(vl[i], "_X_") } # remove suffix x <- gsub("_X_$", "", x) # to factor x <- factor(x, levels = unique(vl)) } } # drop unused levels? if (drop.levels && is.factor(x)) x <- droplevels(x) # set back variable labels if (!is.null(var_lab)) x <- suppressWarnings(set_label(x, label = var_lab)) # check if we should set back former variable and value labels if (keep.labels && !prefix) { labels.names <- names(labels) labels.values <- unname(labels) labels <- labels.names names(labels) <- labels.values x <- set_labels(x, labels = labels, force.labels = TRUE) } # return as factor x } sjlabelled/R/remove_all_labels.R0000644000176200001440000000326014046456500016353 0ustar liggesusers#' @title Remove value and variable labels from vector or data frame #' @name remove_all_labels #' #' @description This function removes value and variable label attributes #' from a vector or data frame. These attributes are typically #' added to variables when importing foreign data (see #' \code{\link{read_spss}}) or manually adding label attributes #' with \code{\link{set_labels}}. #' #' @seealso See vignette \href{../doc/intro_sjlabelled.html}{Labelled Data and the sjlabelled-Package}, #' and \code{\link{copy_labels}} for adding label attributes #' (subsetted) data frames. #' #' @param x Vector or \code{data.frame} with variable and/or value label attributes #' @return \code{x} with removed value and variable label attributes. #' #' @examples #' data(efc) #' str(efc) #' str(remove_all_labels(efc)) #' @export remove_all_labels <- function(x) { UseMethod("remove_all_labels") } #' @export remove_all_labels.data.frame <- function(x) { as.data.frame(lapply(x, FUN = remove_all_labels_helper), stringsAsFactors = FALSE) } #' @export remove_all_labels.list <- function(x) { lapply(x, FUN = remove_all_labels_helper) } #' @export remove_all_labels.default <- function(x) { remove_all_labels_helper(x) } remove_all_labels_helper <- function(x) { # remove attributes attr(x, "label") <- NULL attr(x, "labels") <- NULL attr(x, "na_values") <- NULL attr(x, "na.values") <- NULL # unclass, if labelled. labelled class may throw # errors / warnings, when not havin label attributes if (is_labelled(x)) x <- unclass(x) # return var x } sjlabelled/R/convert_case.R0000644000176200001440000000422314046276665015373 0ustar liggesusers#' @title Generic case conversion for labels #' @name convert_case #' #' @description This function wraps \code{to_any_case()} from the \pkg{snakecase} #' package with certain defaults for the \code{sep_in} and #' \code{sep_out} arguments, used for instance to convert cases in #' \code{\link{term_labels}}. #' #' @param lab Character vector that should be case converted. #' @param case Desired target case. Labels will automatically converted into the #' specified character case. See \code{\link[snakecase:to_any_case]{to_any_case()}} for #' more details on this argument. #' @param verbose Toggle warnings and messages on or off. #' @param ... Further arguments passed down to \code{to_any_case()}, #' like \code{sep_in} or \code{sep_out}. #' #' @return \code{lab}, with converted case. #' #' @details When calling \code{to_any_case()} from \pkg{snakecase}, the #' \code{sep_in} argument is set to \code{"(?% #' select(c172code, e42dep) %>% #' set_labels( #' e42dep, #' labels = c("independent" = 1, "severe dependency" = 4) #' ) %>% #' zap_labels() #' #' #' # drop_labels() ---- #' #' rp <- rec_pattern(1, 100) #' rp #' #' # sample data #' data(efc) #' # recode carers age into groups of width 5 #' x <- rec(efc$c160age, rec = rp$pattern) #' # add value labels to new vector #' x <- set_labels(x, labels = rp$labels) #' #' # watch result. due to recode-pattern, we have age groups with #' # no observations (zero-counts) #' frq(x) #' # now, let's drop zero's #' frq(drop_labels(x)) #' #' # drop labels, also drop NA value labels, then also zap tagged NA #' if (require("haven")) { #' x <- labelled(c(1:3, tagged_na("z"), 4:1), #' c("Agreement" = 1, "Disagreement" = 4, "Unused" = 5, #' "Not home" = tagged_na("z"))) #' x #' drop_labels(x, drop.na = FALSE) #' drop_labels(x) #' zap_na_tags(drop_labels(x)) #' #' # fill_labels() ---- #' #' # create labelled integer, with tagged missings #' x <- labelled( #' c(1:3, tagged_na("a", "c", "z"), 4:1), #' c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), #' "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) #' ) #' # get current values and labels #' x #' get_labels(x) #' #' fill_labels(x) #' get_labels(fill_labels(x)) #' # same as #' get_labels(x, non.labelled = TRUE) #' } #' } #' @importFrom stats na.omit #' @export zap_labels <- function(x, ...) { dots <- as.character(match.call(expand.dots = FALSE)$`...`) .dat <- .get_dot_data(x, dots) if (is.data.frame(x)) { # iterate variables of data frame for (i in colnames(.dat)) { x[[i]] <- zap_labels_helper(.dat[[i]]) } } else { x <- zap_labels_helper(.dat) } x } #' @rdname zap_labels #' @importFrom stats na.omit #' @export zap_unlabelled <- function(x, ...) { dots <- as.character(match.call(expand.dots = FALSE)$`...`) .dat <- .get_dot_data(x, dots) if (is.data.frame(x)) { # iterate variables of data frame for (i in colnames(.dat)) { x[[i]] <- zap_unlabelled_helper(.dat[[i]]) } } else { x <- zap_unlabelled_helper(.dat) } x } #' @title Convert tagged NA values into regular NA #' @name zap_na_tags #' #' @description Replaces all \code{\link[haven:tagged_na]{tagged_na()}} values with #' regular \code{NA}. #' #' @param x A \code{\link[haven:labelled]{labelled()}} vector with \code{tagged_na} #' values, or a data frame with such vectors. #' #' @inheritParams add_labels #' #' @return \code{x}, where all \code{tagged_na} values are converted to \code{NA}. #' #' @examples #' if (require("haven")) { #' x <- labelled( #' c(1:3, tagged_na("a", "c", "z"), 4:1), #' c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), #' "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) #' ) #' # get current NA values #' x #' get_na(x) #' zap_na_tags(x) #' get_na(zap_na_tags(x)) #' #' # also works with non-labelled vector that have tagged NA values #' x <- c(1:5, tagged_na("a"), tagged_na("z"), NA) #' haven::print_tagged_na(x) #' haven::print_tagged_na(zap_na_tags(x)) #' } #' @importFrom stats na.omit #' @export zap_na_tags <- function(x, ...) { dots <- as.character(match.call(expand.dots = FALSE)$`...`) .dat <- .get_dot_data(x, dots) if (is.data.frame(x)) { # iterate variables of data frame for (i in colnames(.dat)) { x[[i]] <- zap_na_tags_helper(.dat[[i]]) } } else { x <- zap_na_tags_helper(.dat) } x } zap_labels_helper <- function(x) { x <- set_na(x, na = get_values(x, drop.na = TRUE)) # remove label attributes attr(x, "label") <- NULL if (is_labelled(x)) class(x) <- NULL x } zap_unlabelled_helper <- function(x) { vals <- get_values(x) x <- set_na(x, na = stats::na.omit(unique(x)[!unique(x) %in% vals])) if (is_labelled(x)) class(x) <- NULL x } zap_na_tags_helper <- function(x) { if (!requireNamespace("haven", quietly = TRUE)) { stop("Package 'haven' required for this function. Please install it.") } # check if values has only NA's if (sum(is.na(x)) == length(x)) return(x) # convert all NA, including tagged NA, into regular NA x[is.na(x)] <- NA # get labels, w/o labelled NA # retrieve named labels labs <- attr(x, "labels", exact = TRUE) labs <- labs[!haven::is_tagged_na(labs)] attr(x, "na_values") <- NULL attr(x, "na.values") <- NULL # if no labels left, clear attribute if (is.null(labs)) { attr(x, "labels") <- NULL return(x) } else { set_labels(x, labels = labs) } } sjlabelled/NEWS.md0000644000176200001440000002133114046417071013455 0ustar liggesusers# sjlabelled 1.1.8 ## Changes to functions * `label_to_colnames()` gets a `...` argument, to change only column names of selected variables. ## Bug fixes * Fixed issues with reading/writing tagged `NA` values. * Fixed issues with value labelling of character vectors. # sjlabelled 1.1.7 ## General * `write_spss()` gets a `compress` argument, to save data as compressed SPSS file (zsav-format). * The select-helper `num_range()` is now also supported. * Give proper warning when `set_labels()` was used on a vector with completely missing values. # sjlabelled 1.1.6 ## General * Fix issue in `write_*()` functions that did not work since the last *haven* update. # sjlabelled 1.1.5 ## General * Fix CRAN check issues. # sjlabelled 1.1.4 ## General * Reduce package dependencies. ## Bug fixes * Fixed issues in `remove_labels()` when factor levels were strings. * Fixed Namespace issues in vignettes. # sjlabelled 1.1.3 ## Bug fixes * Fix warning from CRAN checks. # sjlabelled 1.1.2 ## General * Reduce package dependencies. * `get_dv_labels()` was renamed to `response_labels()`. `get_dv_labels()` will remain as alias. * `get_term_labels()` was renamed to `term_labels()`. `get_term_labels()` will remain as alias. ## New functions * `label_to_colnames()` as a convenient shortcut to set variable labels as column names. ## Changes to functions * `copy_labels` gets a `...`-argument to copy only specific variables values. * The `read_*()` functions are now less verbose by default. ## Bug fixes * Fixed issue in `set_labels()` for character vectors with numeric char-values that are larger than `9`. # sjlabelled 1.1.1 ## Changes to functions * The `read_*()`-functions get a `drop.labels`-argument, which - if `TRUE` - automatically calls `drop_labels()` on the imported dataset. * `read_data()` is a shortcut that calls one of the `read_*()`-functions, depending on the file extension. ## Bug fixes * Fix issue in `as_label()` to prevent replacement recycling when labels were numeric. * Fix issues with saving character vectors to SPSS files. # sjlabelled 1.1.0 ## New functions * `remove_label()`, to remove variable labels (and preserve value labels). ## Changes to functions `convert_case()` gets a `verbose`-argument to toggle warnings and messages on or off. # sjlabelled 1.0.17 ## General * Reduce package dependencies. * New package-vignette on quasiquotation. ## New functions * Re-implement `set_na()`, to define (labelled) `NA`-values in a vector. ## Changes to functions * `as_label()` gets a `keep.labels`-argument. With this, users can easily convert vector to factors and vice versa, preserving label-attributes. ## Bug fixes * Fixed bug with argument `use.labels` in `as_numeric()`. # sjlabelled 1.0.16 ## General * Started adding test-units. * Minor code revisions to avoid errors during CRAN check for current devel-versions of R. ## New functions * `val_labels()` as counterpart to `var_labels()`, to set value labels, with support for quasi-quotation (see Examples). ## Changes to functions * `var_labels()` now supports quasi-quotation (see Examples). # sjlabelled 1.0.15 ## General * Update code to the new class-attribute `haven_labelled` from the **haven**-package. ## Bug fixes * Fix issue in `get_term_labels()` that returned wrong object names for factors where factor levels did start with "1". # sjlabelled 1.0.14 ## General * Reduce package dependencies. ## Bug fixes * Fix bug in `var_labels()`, where non-existing columns may lead to wrong labelling. # sjlabelled 1.0.13 ## General * Removed defuncted functions. ## Changes to functions * `copy_labels()` now also copy labels even if columns in subsetted and original data frame do not completely match. * Arguments `include.non.labelled` and `include.values` in `get_labels()` are renamed to shorter versions `non.labelled` and `values`. `include.non.labelled` and `include.values` will become softly deprecated. * The `read_*()`-functions get a `verbose`-argument, to show or hide the progressbar when imported datasets are converted. ## Bug fixes * Due to changes in the _broom_ and _lmerTest_ packages, tidiers did no longer work for `lmerModLmerTest` objects. # sjlabelled 1.0.12 ## General * `get_dv_labels()` and `get_term_labels()` now support _clmm_-objects (package **ordinal**) and _stanmvreg_-objects (package **rstanarm**). * `read_spss()` gets a `enc`-argument for character encoding, which is now supported since haven 1.1.2. * `get_term_labels()` now returns `NULL` for unsupported models, instead of giving an error. * `get_dv_labels()` now returns a default string for unsupported models, instead of giving an error. # sjlabelled 1.0.11 ## General * `as_labelled()` now corrects inconsistent types between labels and variable values. ## Changes to functions * `get_dv_labels()` gets a `multi.resp`-argument to return each label of a multivariate response model (only for _brmsfit_ objects). * `get_label()` now also returns name-attribute for empty labels if `x` was a data.frame. ## Bug fixes * `write_*()`-functions should now properly set labels for negative values. # sjlabelled 1.0.9 ## General * Deprecated `set_note()` and `get_note()`, because there is already an R base function for this purpose: `comment()`. * Improved performance of functions, at the cost of removing support for the _foreign_ package. _sjlabelled_ now only supports labelled data from package _haven_. ## Changes to functions * `get_term_labels()` gets a `prefix`-argument to prefix the returned labels of categorical variable either with the related variable name or label. ## Bug fixes * Fix issues with retrieving incorrect labels from `get_term_labels()` for models that used unlabelled data in combination with other contrasts than the default option. * `get_dv_labels()` no longer returns `"NULL"` for multivariate-response-models fitted with _brms_. # sjlabelled 1.0.8 ## General * Removed `lbl_df()`, because printing tibbles now depends on pkg _pillar_ and was revised substantially, so maintainace of `lbl_df()` is too extensive. # sjlabelled 1.0.7 ## General * Cross references from `dplyr::select_helpers` were updated to `tidyselect::select_helpers`. * Replace deprecated arguments in `convert_case()` from call to package *snakecase* # sjlabelled 1.0.6 ## Changes to functions * `get_dv_labels()` and `get_term_labels()` now support `clm`-objects from package *ordinal*, `polr`-objects from package *MASS* and `Zelig-relogit`-objects from package *Zelig*. * `get_dv_labels()` and `get_term_labels()` get a `...`-argument to pass down further arguments to `snakecase::to_any_case()`. * `convert_case()` is now exported, for usage in other packages as well. * Remove `protect`-argument from internal case conversion (affects `get_term_labels()` and `get_dv_labels()`), in preparation for forthcoming *snakecase*-package update. # sjlabelled 1.0.5 ## General * Remove unnecessary imports. * Revised `lbl_df()` due to changes in the internals of `tibble::trunc_mat()`. ## New functions * `as_factor()` to convert labelled vectors into factors, preserving labels. ## Changes to functions * `get_dv_labels()` now supports `brmsfit`-objects from package `brms`. # sjlabelled 1.0.4 ## Changes to functions * `get_term_labels()` now includes variable names for factors with numeric factor levels only (and not only return the numeric level as term label). ## Bug fixes * Fixed bug for `as_label()`, when `x` was a character vector and argument `drop.levels` was `TRUE`. * Fixed issue for *lme* and *gls* objects in `get_term_labels()` and `get_dv_labels()`. # sjlabelled 1.0.3 ## General * Changed package imports, so `sjlabelled` no longer requires R version 3.3.3 or higher. # sjlabelled 1.0.2 ## General * Minor fix to avoid warning when using `as_numeric()`. ## Changes to functions * `get_label()`, `get_term_labels()` and `get_dv_labels()` get a `case`-argument, to convert labels into any case, using the [snakecase](https://cran.r-project.org/package=snakecase)-package. # sjlabelled 1.0.1 ## General * Removed function 'var_rename()', which is in pkg 'sjmisc'. ## New functions * `get_term_labels()` and `get_dv_labels()` to retrieve term labels from regression models. ## Changes to functions * `as_numeric()` gets a `use.labels`-argument to use value labels as new values if these are numeric. # sjlabelled 1.0.0 ## General * Initial release. All labelled data utility functions from package *sjmisc* have been moved to this package, which is now dedicated to tools for working with labelled data. sjlabelled/MD50000644000176200001440000001046314046513502012667 0ustar liggesusersd7c7d10b8c784a27ff87b3744790c607 *DESCRIPTION f34421ab18ce67c29391010da5e32b20 *NAMESPACE 3317c0998d0e8353ffeee9a9ee298558 *NEWS.md 76cb3acc470f7db52e8db7138ce6579a *R/add_labels.R 1556cb73b7ad284a6196da00f29c7327 *R/as_character.R b9fb975099ca978718d5c47f863bb09a *R/as_factor.R 88466993916cc82f70f2da6d96fde7f4 *R/as_label.R ce99db254be488ab87130a4686f57046 *R/as_labelled.R f7b337c176e1e94e3c015fdaa6e6403d *R/as_numeric.R d7e27e896b7dab5f74838235a841dbb9 *R/convert_case.R 62225e28b461ef57d3aa027a4a6ecee3 *R/copy_labels.R a1cd72e490d95bad1caf256ffb91ba46 *R/drop_labels.R 47d3cf16804c48bce960d6e9a8336c6d *R/efc.R 5901bc71dc6c62779617cacf1bb59598 *R/fill_labels.R 80ebc365782135eee900bf1526cc5f67 *R/get_label.R f7c96d97bba0b36601ed2e09f0d71e02 *R/get_labels.R 0f8d2597581039d507430a8536fb9975 *R/get_model_labels.R 326051d0b07305cb4ef1e63d17167748 *R/get_na.R dc27bca54a80f753ca5916e7169ec8c6 *R/get_values.R a5cb72e330c08320fb5e079e1159d701 *R/helpfunctions.R b92538ef946700cc8976b2f355de3c37 *R/is_labelled.R 0d8c9c1ae768822190c4dd20f9bbd2fa *R/label_to_colnames.R b7baeb8a9f0a38300fcf65e9dc96b9e9 *R/read.R e8bd799c35770d3710a4b78e05bbfb3c *R/remove_all_labels.R c01d7261ff59e67b208c4f0a5a68ecd9 *R/remove_label.R 81e8eccba97e66fa3453e1674da4087f *R/remove_labels.R 4904decea3a629ce2a29cc7a456ba8a9 *R/select_helpers.R 0d304894da3d9bb5d022bb711e51846c *R/set_label.R c6166d2ba739dedced541f512d31345e *R/set_labels.R ddc02cf3ee18ae91b04d835d33abc44b *R/set_na.R 3f711618bb9552d79b7f2a96b09fe919 *R/tidy_labels.R b2d8557f9a7f49203b28394024c69ccd *R/unlabel.R b8b265a4aacad44d37d31f775c0c582f *R/utils_get_dots.R 2c2bcaa5a9fdf910505dfef041bf56e2 *R/val_labels.R a00730235b525119af10271a32124de0 *R/var_labels.R b67c8cfe67c597c3044ca3e2dd91ad5f *R/write.R c6b4f837e217f2516034b1b295a1fdc3 *R/zap_labels.R 75d3802d5a8e944292eabd1b196ae40f *README.md 9386140056c148e26a106d2b68f9783d *build/vignette.rds 3172b22b3d87d0f86d78326bc26891fc *data/efc.RData 5fae7b6b425a9e8629b363da96e51d80 *inst/CITATION b4121120e6d15dcce5d0cfcf3d044e1e *inst/doc/intro_sjlabelled.R 303265b52e36b864e0f12e64fd773011 *inst/doc/intro_sjlabelled.Rmd d2bfe7466e6a33e1ba01711e09bacbea *inst/doc/intro_sjlabelled.html 90ec16c85fcab69e2a651c61c99c7591 *inst/doc/labelleddata.R 13120c4a47121e5f08b0903d8e4a4f5a *inst/doc/labelleddata.Rmd ffa5bca019803921d7f4a6e837231857 *inst/doc/labelleddata.html fdf1cf30c1ee0b160dfe598c41fed16d *inst/doc/quasiquotation.R ec23e6d4cd3097b5fcd0ad0db4904aa4 *inst/doc/quasiquotation.Rmd 4ea04d69608b219b9770d8963a0a1d7b *inst/doc/quasiquotation.html 4a0520850d844e830a1687de7a84f21c *man/add_labels.Rd e7280dcd76e9ce7f026828ef76f89cb8 *man/as_factor.Rd 3ba0570b10a1047648417345795715b5 *man/as_label.Rd 5921a9e983457d106d7d7f8f0f338d0d *man/as_labelled.Rd a1c25faa18ad86f6ac0815d05afc0d95 *man/as_numeric.Rd 16b28e4f39f65ba69cca49c82d6766dd *man/convert_case.Rd d87153a8a6e835b9a1ecbb19fedc1744 *man/copy_labels.Rd 7b767f14249427c725c35c2535d82399 *man/efc.Rd e7cd20460ab87f2c257bd96a478b1d11 *man/figures/logo.png 1c9a746f1dd3c71d1c46cd7177fb3b5d *man/get_label.Rd 081bc532741c9f4eee4b536303470725 *man/get_labels.Rd 5c2ec37af3df61fdb4d736d27c575ec5 *man/get_na.Rd 9dc1c7cb1f34d5faee37273d2d65ed1f *man/get_values.Rd 9d6be024903423247c0f6339f96c70b1 *man/is_labelled.Rd 26a8e815d0a5bc21ec41942741e04c1e *man/label_to_colnames.Rd 07fed19f6c07df56f0bdda35d2fbb46e *man/read_spss.Rd fb9106521718dd54471fde7a602ad409 *man/remove_all_labels.Rd c484d70592715145ccf87dec92bff194 *man/remove_label.Rd 2d0dc52cad0519f3213abafd5a91cd54 *man/set_label.Rd d4143d6e1af82624750f57bbedf7e8de *man/set_labels.Rd 25792b37b5835f3dff9d57473ab8194d *man/set_na.Rd 798a93671e200b674935a501635b46c9 *man/sjlabelled-package.Rd 732ecfdd47b1a6cf197d51c1ecd3594c *man/term_labels.Rd 7c664e598fb6d1854a6242f29cbf3e97 *man/tidy_labels.Rd 47f843337de8955eb8cd032331508cdf *man/unlabel.Rd 2e62c1e44731563258a283532fb2272e *man/write_spss.Rd 91dd4e608b83ae0b866ba97d8f81aaed *man/zap_labels.Rd e4564ba7a7125aa17a25d4e400e6563c *man/zap_na_tags.Rd f8587ac619c437860ebbc497db333de3 *tests/testthat.R 9da10c5d5c57dd43cf39f57c7dc88506 *tests/testthat/test-as_numeric.R f222635816fc2d12909e3f92078c5b7f *tests/testthat/test-remove_labels.R 303265b52e36b864e0f12e64fd773011 *vignettes/intro_sjlabelled.Rmd 13120c4a47121e5f08b0903d8e4a4f5a *vignettes/labelleddata.Rmd ec23e6d4cd3097b5fcd0ad0db4904aa4 *vignettes/quasiquotation.Rmd sjlabelled/inst/0000755000176200001440000000000014046506732013337 5ustar liggesuserssjlabelled/inst/doc/0000755000176200001440000000000014046506732014104 5ustar liggesuserssjlabelled/inst/doc/intro_sjlabelled.R0000644000176200001440000001737214046506730017553 0ustar liggesusers## ----echo = FALSE------------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("haven", quietly = TRUE) || !requireNamespace("magrittr", quietly = TRUE) || !requireNamespace("dplyr", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ## ----------------------------------------------------------------------------- library(haven) x <- labelled( c(1:3, tagged_na("a", "c", "z"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) ) print(x) ## ----------------------------------------------------------------------------- is.na(x) as_factor(x) is.na(as_factor(x)) ## ----message=FALSE------------------------------------------------------------ library(sjlabelled) # sjlabelled-sample data, an atomic vector with label attributes data(efc) str(efc$e16sex) ## ----------------------------------------------------------------------------- get_labels(efc$e42dep) ## ----------------------------------------------------------------------------- get_labels(efc$e42dep, values = "p") ## ----------------------------------------------------------------------------- x <- factor(c("low", "mid", "low", "hi", "mid", "low")) get_labels(x) ## ----------------------------------------------------------------------------- x <- factor(c("low", "mid", "low", "hi", "mid", "low")) get_labels(x, attr.only = TRUE) ## ----------------------------------------------------------------------------- # get labels, including tagged NA values x <- labelled( c(1:3, tagged_na("a", "c", "z"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) ) get_labels(x) ## ----------------------------------------------------------------------------- get_labels(x, non.labelled = TRUE) ## ----------------------------------------------------------------------------- get_labels(x, values = "n", drop.na = FALSE) ## ----------------------------------------------------------------------------- print(x) get_values(x) ## ----------------------------------------------------------------------------- get_values(x, drop.na = TRUE) ## ----------------------------------------------------------------------------- x <- sample(1:4, 20, replace = TRUE) # return new labelled vector x <- set_labels(x, labels = c("very low", "low", "mid", "hi")) x ## ----------------------------------------------------------------------------- x <- c(2, 2, 3, 3, 2) x <- set_labels(x, labels = c("a", "b", "c")) x ## ----------------------------------------------------------------------------- x <- c(2, 2, 3, 3, 2) x <- set_labels( x, labels = c("a", "b", "c"), force.labels = TRUE ) x ## ----------------------------------------------------------------------------- x <- c(1, 2, 3, 2, 4, NA) x <- set_labels(x, labels = c("yes", "maybe", "no")) x ## ----------------------------------------------------------------------------- x <- c(1, 2, 3, 2, 4, NA) x <- set_labels( x, labels = c("yes", "maybe", "no"), force.values = FALSE ) x ## ----------------------------------------------------------------------------- x <- c(1, 2, 3, 2, 4, 5) x <- set_labels( x, labels = c("strongly agree" = 1, "totally disagree" = 4, "refused" = 5, "missing" = 9) ) x ## ----------------------------------------------------------------------------- tmp <- data.frame( a = c(1, 2, 3), b = c(1, 2, 3), c = c(1, 2, 3) ) labels <- list( c("one", "two", "three"), c("eins", "zwei", "drei"), c("un", "dos", "tres") ) tmp <- set_labels(tmp, labels = labels) str(tmp) ## ----message=FALSE------------------------------------------------------------ library(dplyr) library(sjmisc) # for frq() data(efc) efc %>% select(c82cop1, c83cop2, c84cop3) %>% set_labels(labels = c("not often" = 1, "very often" = 4)) %>% frq() ## ----------------------------------------------------------------------------- get_label(efc$e42dep) get_label(efc, e42dep, e16sex, e15relat) ## ----------------------------------------------------------------------------- dummy <- c(1, 2, 3) testit <- function(x) get_label(x, def.value = deparse(substitute(x))) # returns name of vector, if it has no variable label testit(dummy) ## ----------------------------------------------------------------------------- data(iris) # returns no labels, because iris-data is not labelled get_label(iris) # returns the column name as default labels, if data is not labelled get_label(iris, def.value = colnames(iris)) # labels are parsed in a readable way get_label(iris, def.value = colnames(iris), case = "parsed") ## ----------------------------------------------------------------------------- x <- sample(1:4, 10, replace = TRUE) # return new vector x <- set_label(x, label = "Dummy-variable") str(x) # label existing vector set_label(x) <- "Another Dummy-variable" str(x) ## ----------------------------------------------------------------------------- x <- data.frame( a = sample(1:4, 10, replace = TRUE), b = sample(1:4, 10, replace = TRUE), c = sample(1:4, 10, replace = TRUE) ) x <- set_label(x, label = c("Variable A", "Variable B", "Variable C")) str(x) get_label(x) ## ----------------------------------------------------------------------------- x <- data.frame( a = sample(1:4, 10, replace = TRUE), b = sample(1:4, 10, replace = TRUE), c = sample(1:4, 10, replace = TRUE) ) library(magrittr) # for pipe x %>% var_labels( a = "Variable A", b = "Variable B", c = "Variable C" ) %>% str() ## ----------------------------------------------------------------------------- x <- sample(1:8, 100, replace = TRUE) # show value distribution table(x) # set value 1 and 8 as tagged missings x <- set_na(x, na = c(1, 8), as.tag = TRUE) x # show value distribution, including missings table(x, useNA = "always") # now let's see, which NA's were "1" and which were "8" print_tagged_na(x) x <- factor(c("a", "b", "c")) x # set NA into existing vector x <- set_na(x, na = "b", as.tag = TRUE) x ## ----------------------------------------------------------------------------- get_na(x) ## ----------------------------------------------------------------------------- get_na(x, as.tag = TRUE) ## ----------------------------------------------------------------------------- library(sjmisc) # for replace_na() data(efc) str(efc$c84cop3) efc$c84cop3 <- set_na(efc$c84cop3, na = c(2, 3), as.tag = TRUE) get_na(efc$c84cop3, as.tag = TRUE) # this would replace all NA's into "2" dummy <- replace_na(efc$c84cop3, value = 2) # labels of former tagged NA's are preserved get_labels(dummy, drop.na = FALSE, values = "p") get_na(dummy, as.tag = TRUE) # No more NA values frq(dummy) # In this example, the tagged NA(2) is replaced with value 2 # the new value label for value 2 is "restored NA" dummy <- replace_na(efc$c84cop3, value = 2, na.label = "restored NA", tagged.na = "2") # Only one tagged NA remains get_labels(dummy, drop.na = FALSE, values = "p") get_na(dummy, as.tag = TRUE) # Some NA values remain frq(dummy) ## ----------------------------------------------------------------------------- str(efc$c82cop1) efc$c82cop1 <- set_na(efc$c82cop1, na = c(2, 3), as.tag = TRUE) get_na(efc$c82cop1, as.tag = TRUE) efc$c82cop1 <- replace_labels(efc$c82cop1, labels = c("new NA label" = tagged_na("2"))) get_na(efc$c82cop1, as.tag = TRUE) sjlabelled/inst/doc/labelleddata.Rmd0000644000176200001440000000720714046441130017142 0ustar liggesusers--- title: "Working with Labelled Data" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Working with Labelled Data} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ``` This vignette shows a small example how functions to work with labelled data can be implemented in a typical data visualization workflow. # Labelled Data In software like SPSS, it is common to have value and variable labels as variable attributes. Variable values, even if categorical, are mostly numeric. In R, however, you may use labels as values directly: ```{r} factor(c("low", "high", "mid", "high", "low")) ``` Reading SPSS-data with **haven** or **sjlabelled** keeps the numeric values for variables and adds the value and variable labels as attributes. See following example from the sample-dataset efc, which is part of the **sjlabelled**-package: ```{r} library(sjlabelled) data(efc) str(efc$e42dep) ``` While all plotting and table functions of the [sjPlot-package](https://cran.r-project.org/package=sjPlot) make use of these attributes, many packages and/or functions do not consider these attributes, e.g. R base graphics: ```{r warning=FALSE, fig.height=6, fig.width=7} library(sjlabelled) data(efc) barplot( table(efc$e42dep, efc$e16sex), beside = TRUE, legend.text = TRUE ) ``` As you can see in the above figure, the plot has neither axis nor legend labels. # Adding value labels as factor values `as_label()` is a sjlabelled-function that converts a numeric variable into a factor and sets attribute-value-labels as factor levels. When using factors with valued levels, the bar plot will be labelled. ```{r warning=FALSE, fig.height=6, fig.width=7} barplot( table(sjlabelled::as_label(efc$e42dep), sjlabelled::as_label(efc$e16sex)), beside = TRUE, legend.text = TRUE ) ``` # Getting and setting value and variable labels There are four functions that let you easily set or get value and variable labels of either a single vector or a complete data frame: * `get_label()` to get variable labels * `get_labels()` to get value labels * `set_label()` to set variable labels (add them as vector attribute) * `set_labels()` to set value labels (add them as vector attribute) With this function, you can easily add titles to plots dynamically, i.e. depending on the variable that is plotted. ```{r warning=FALSE, fig.height=6, fig.width=7} barplot( table(sjlabelled::as_label(efc$e42dep), sjlabelled::as_label(efc$e16sex)), beside = TRUE, legend.text = TRUE, main = get_label(efc$e42dep) ) ``` # Restore labels from subsetted data The base `subset()` function drops label attributes (or vector attributes in general) when subsetting data. In the sjlabelled-package, there are handy functions to deal with this problem: `copy_labels()` and `remove_labels()`. `copy_labels()` adds back labels to a subsetted data frame based on the original data frame. And `remove_labels()` removes all label attributes. ## Losing labels during subset ```{r} efc.sub <- subset(efc, subset = e16sex == 1, select = c(4:8)) str(efc.sub) ``` ## Add back labels ```{r, message=FALSE} efc.sub <- copy_labels(efc.sub, efc) str(efc.sub) ``` # Conclusion When working with labelled data, especially when working with data sets imported from other software packages, it comes very handy to make use of the label attributes. The **sjlabelled**-package supports this feature and offers useful functions for these tasks. sjlabelled/inst/doc/labelleddata.html0000644000176200001440000012703214046506731017374 0ustar liggesusers Working with Labelled Data

Working with Labelled Data

Daniel Lüdecke

2021-05-11

This vignette shows a small example how functions to work with labelled data can be implemented in a typical data visualization workflow.

Labelled Data

In software like SPSS, it is common to have value and variable labels as variable attributes. Variable values, even if categorical, are mostly numeric. In R, however, you may use labels as values directly:

factor(c("low", "high", "mid", "high", "low"))
#> [1] low  high mid  high low 
#> Levels: high low mid

Reading SPSS-data with haven or sjlabelled keeps the numeric values for variables and adds the value and variable labels as attributes. See following example from the sample-dataset efc, which is part of the sjlabelled-package:

library(sjlabelled)
data(efc)
str(efc$e42dep)
#>  num [1:908] 3 3 3 4 4 4 4 4 4 4 ...
#>  - attr(*, "label")= chr "elder's dependency"
#>  - attr(*, "labels")= Named num [1:4] 1 2 3 4
#>   ..- attr(*, "names")= chr [1:4] "independent" "slightly dependent" "moderately dependent" "severely dependent"

While all plotting and table functions of the sjPlot-package make use of these attributes, many packages and/or functions do not consider these attributes, e.g. R base graphics:

library(sjlabelled)
data(efc)
barplot(
  table(efc$e42dep, efc$e16sex), 
  beside = TRUE, 
  legend.text = TRUE
)

As you can see in the above figure, the plot has neither axis nor legend labels.

Adding value labels as factor values

as_label() is a sjlabelled-function that converts a numeric variable into a factor and sets attribute-value-labels as factor levels. When using factors with valued levels, the bar plot will be labelled.

barplot(
  table(sjlabelled::as_label(efc$e42dep),
        sjlabelled::as_label(efc$e16sex)), 
  beside = TRUE, 
  legend.text = TRUE
)

Getting and setting value and variable labels

There are four functions that let you easily set or get value and variable labels of either a single vector or a complete data frame:

  • get_label() to get variable labels
  • get_labels() to get value labels
  • set_label() to set variable labels (add them as vector attribute)
  • set_labels() to set value labels (add them as vector attribute)

With this function, you can easily add titles to plots dynamically, i.e. depending on the variable that is plotted.

barplot(
  table(sjlabelled::as_label(efc$e42dep),
        sjlabelled::as_label(efc$e16sex)), 
  beside = TRUE, 
  legend.text = TRUE,
  main = get_label(efc$e42dep)
)

Restore labels from subsetted data

The base subset() function drops label attributes (or vector attributes in general) when subsetting data. In the sjlabelled-package, there are handy functions to deal with this problem: copy_labels() and remove_labels().

copy_labels() adds back labels to a subsetted data frame based on the original data frame. And remove_labels() removes all label attributes.

Losing labels during subset

efc.sub <- subset(efc, subset = e16sex == 1, select = c(4:8))
str(efc.sub)
#> 'data.frame':    296 obs. of  5 variables:
#>  $ e17age : num  74 68 80 72 94 79 67 80 76 88 ...
#>  $ e42dep : num  4 4 1 3 3 4 3 4 2 4 ...
#>  $ c82cop1: num  4 3 3 4 3 3 4 2 2 3 ...
#>  $ c83cop2: num  2 4 2 2 2 2 1 3 2 2 ...
#>  $ c84cop3: num  4 4 1 1 1 4 2 4 2 4 ...

Add back labels

efc.sub <- copy_labels(efc.sub, efc)
str(efc.sub)
#> 'data.frame':    296 obs. of  5 variables:
#>  $ e17age : num  74 68 80 72 94 79 67 80 76 88 ...
#>   ..- attr(*, "label")= chr "elder' age"
#>  $ e42dep : num  4 4 1 3 3 4 3 4 2 4 ...
#>   ..- attr(*, "label")= chr "elder's dependency"
#>   ..- attr(*, "labels")= Named num [1:4] 1 2 3 4
#>   .. ..- attr(*, "names")= chr [1:4] "independent" "slightly dependent" "moderately dependent" "severely dependent"
#>  $ c82cop1: num  4 3 3 4 3 3 4 2 2 3 ...
#>   ..- attr(*, "label")= chr "do you feel you cope well as caregiver?"
#>   ..- attr(*, "labels")= Named num [1:4] 1 2 3 4
#>   .. ..- attr(*, "names")= chr [1:4] "never" "sometimes" "often" "always"
#>  $ c83cop2: num  2 4 2 2 2 2 1 3 2 2 ...
#>   ..- attr(*, "label")= chr "do you find caregiving too demanding?"
#>   ..- attr(*, "labels")= Named num [1:4] 1 2 3 4
#>   .. ..- attr(*, "names")= chr [1:4] "Never" "Sometimes" "Often" "Always"
#>  $ c84cop3: num  4 4 1 1 1 4 2 4 2 4 ...
#>   ..- attr(*, "label")= chr "does caregiving cause difficulties in your relationship with your friends?"
#>   ..- attr(*, "labels")= Named num [1:4] 1 2 3 4
#>   .. ..- attr(*, "names")= chr [1:4] "Never" "Sometimes" "Often" "Always"

Conclusion

When working with labelled data, especially when working with data sets imported from other software packages, it comes very handy to make use of the label attributes. The sjlabelled-package supports this feature and offers useful functions for these tasks.

sjlabelled/inst/doc/quasiquotation.html0000644000176200001440000010776414046506732020077 0ustar liggesusers Using quasiquotation to add variable and value labels

Using quasiquotation to add variable and value labels

Daniel Lüdecke

2021-05-11

Labelling data is typically a task for end-users and is applied in own scripts or functions rather than in packages. However, sometimes it can be useful for both end-users and package developers to have a flexible way to add variable and value labels to their data. In such cases, quasiquotation is helpful.

This vignette demonstrate how to use quasiquotation in sjlabelled to label your data.

Adding value labels to variables using quasiquotation

Usually, set_labels() can be used to add value labels to variables. The syntax of this function is easy to use, and set_labels() allows to add value labels to multiple variables at once, if these variables share the same value labels.

In the following examples, we will use the frq() function, that shows an extra label-column containing value labels, if the data is labelled. If the data has no value labels, this column is not shown in the output.

library(sjlabelled)
library(sjmisc) # for frq()-function
library(rlang)

# unlabelled data
dummies <- data.frame(
  dummy1 = sample(1:3, 40, replace = TRUE),
  dummy2 = sample(1:3, 40, replace = TRUE),
  dummy3 = sample(1:3, 40, replace = TRUE)
)

# set labels for all variables in the data frame
test <- set_labels(dummies, labels = c("low", "mid", "hi"))

attr(test$dummy1, "labels")
#> low mid  hi 
#>   1   2   3

frq(test, dummy1)
#> 
#> dummy1 <integer>
#> # total N=40  valid N=40  mean=2.10  sd=0.84
#> 
#> Value | Label |  N | Raw % | Valid % | Cum. %
#> ---------------------------------------------
#>     1 |   low | 12 |    30 |      30 |     30
#>     2 |   mid | 12 |    30 |      30 |     60
#>     3 |    hi | 16 |    40 |      40 |    100
#>  <NA> |  <NA> |  0 |     0 |    <NA> |   <NA>

# and set same value labels for two of three variables
test <- set_labels(
  dummies, dummy1, dummy2,
  labels = c("low", "mid", "hi")
)

frq(test)
#> 
#> dummy1 <integer>
#> # total N=40  valid N=40  mean=2.10  sd=0.84
#> 
#> Value | Label |  N | Raw % | Valid % | Cum. %
#> ---------------------------------------------
#>     1 |   low | 12 |    30 |      30 |     30
#>     2 |   mid | 12 |    30 |      30 |     60
#>     3 |    hi | 16 |    40 |      40 |    100
#>  <NA> |  <NA> |  0 |     0 |    <NA> |   <NA>
#> 
#> 
#> dummy2 <integer>
#> # total N=40  valid N=40  mean=1.92  sd=0.80
#> 
#> Value | Label |  N | Raw % | Valid % | Cum. %
#> ---------------------------------------------
#>     1 |   low | 14 | 35.00 |   35.00 |  35.00
#>     2 |   mid | 15 | 37.50 |   37.50 |  72.50
#>     3 |    hi | 11 | 27.50 |   27.50 | 100.00
#>  <NA> |  <NA> |  0 |  0.00 |    <NA> |   <NA>
#> 
#> 
#> dummy3 <integer>
#> # total N=40  valid N=40  mean=1.98  sd=0.83
#> 
#> Value |  N | Raw % | Valid % | Cum. %
#> -------------------------------------
#>     1 | 14 | 35.00 |   35.00 |  35.00
#>     2 | 13 | 32.50 |   32.50 |  67.50
#>     3 | 13 | 32.50 |   32.50 | 100.00
#>  <NA> |  0 |  0.00 |    <NA> |   <NA>

val_labels() does the same job as set_labels(), but in a different way. While set_labels() requires variables to be specified in the ...-argument, and labels in the labels-argument, val_labels() requires both to be specified in the ....

val_labels() requires named vectors as argument, with the left-hand side being the name of the variable that should be labelled, and the right-hand side containing the labels for the values.

test <- val_labels(dummies, dummy1 = c("low", "mid", "hi"))
attr(test$dummy1, "labels")
#> low mid  hi 
#>   1   2   3

# remaining variables are not labelled
frq(test)
#> 
#> dummy1 <integer>
#> # total N=40  valid N=40  mean=2.10  sd=0.84
#> 
#> Value | Label |  N | Raw % | Valid % | Cum. %
#> ---------------------------------------------
#>     1 |   low | 12 |    30 |      30 |     30
#>     2 |   mid | 12 |    30 |      30 |     60
#>     3 |    hi | 16 |    40 |      40 |    100
#>  <NA> |  <NA> |  0 |     0 |    <NA> |   <NA>
#> 
#> 
#> dummy2 <integer>
#> # total N=40  valid N=40  mean=1.92  sd=0.80
#> 
#> Value |  N | Raw % | Valid % | Cum. %
#> -------------------------------------
#>     1 | 14 | 35.00 |   35.00 |  35.00
#>     2 | 15 | 37.50 |   37.50 |  72.50
#>     3 | 11 | 27.50 |   27.50 | 100.00
#>  <NA> |  0 |  0.00 |    <NA> |   <NA>
#> 
#> 
#> dummy3 <integer>
#> # total N=40  valid N=40  mean=1.98  sd=0.83
#> 
#> Value |  N | Raw % | Valid % | Cum. %
#> -------------------------------------
#>     1 | 14 | 35.00 |   35.00 |  35.00
#>     2 | 13 | 32.50 |   32.50 |  67.50
#>     3 | 13 | 32.50 |   32.50 | 100.00
#>  <NA> |  0 |  0.00 |    <NA> |   <NA>

Unlike set_labels(), val_labels() allows the user to add different value labels to different variables in one function call. Another advantage, or difference, of val_labels() is it’s flexibility in defining variable names and value labels by using quasiquotation.

Add labels that are stored in a vector

To use quasiquotation, we need the rlang package to be installed and loaded. Now we can have labels in a character vector, and use !! to unquote this vector.

labels <- c("low_quote", "mid_quote", "hi_quote")
test <- val_labels(dummies, dummy1 = !! labels)
attr(test$dummy1, "labels")
#> low_quote mid_quote  hi_quote 
#>         1         2         3

Define variable names that are stored in a vector

The same can be done with the names of variables that should get new value labels. We then need !! to unquote the variable name and := as assignment.

variable <- "dummy2"
test <- val_labels(dummies, !! variable := c("lo_var", "mid_var", "high_var"))

# no value labels
attr(test$dummy1, "labels")
#> NULL

# value labels
attr(test$dummy2, "labels")
#>   lo_var  mid_var high_var 
#>        1        2        3

Both variable names and value labels are stored in a vector

Finally, we can combine the above approaches to be flexible regarding both variable names and value labels.

variable <- "dummy3"
labels <- c("low", "mid", "hi")
test <- val_labels(dummies, !! variable := !! labels)
attr(test$dummy3, "labels")
#> low mid  hi 
#>   1   2   3

Adding variable labels using quasiquotation

set_label() is the equivalent to set_labels() to add variable labels to a variable. The equivalent to val_labels() is var_labels(), which works in the same way as val_labels(). In case of variable labels, a label-attribute is added to a vector or factor (instead of a labels-attribute, which is used for value labels).

The following examples show how to use var_labels() to add variable labels to the data. We demonstrate this function without further explanation, because it is actually very similar to val_labels().

dummy <- data.frame(
  a = sample(1:4, 10, replace = TRUE),
  b = sample(1:4, 10, replace = TRUE),
  c = sample(1:4, 10, replace = TRUE)
)

# simple usage
test <- var_labels(dummy, a = "first variable", c = "third variable")

attr(test$a, "label")
#> [1] "first variable"
attr(test$b, "label")
#> NULL
attr(test$c, "label")
#> [1] "third variable"

# quasiquotation for labels
v1 <- "First variable"
v2 <- "Second variable"
test <- var_labels(dummy, a = !! v1, b = !! v2)

attr(test$a, "label")
#> [1] "First variable"
attr(test$b, "label")
#> [1] "Second variable"
attr(test$c, "label")
#> NULL

# quasiquotation for variable names
x1 <- "a"
x2 <- "c"
test <- var_labels(dummy, !! x1 := "First", !! x2 := "Second")

attr(test$a, "label")
#> [1] "First"
attr(test$b, "label")
#> NULL
attr(test$c, "label")
#> [1] "Second"

# quasiquotation for both variable names and labels
test <- var_labels(dummy, !! x1 := !! v1, !! x2 := !! v2)

attr(test$a, "label")
#> [1] "First variable"
attr(test$b, "label")
#> NULL
attr(test$c, "label")
#> [1] "Second variable"

Conclusion

As we have demonstrated, var_labels() and val_labels() are one of the most flexible and easy-to-use ways to add value and variable labels to our data. Another advantage is the consistent design of all functions in sjlabelled, which allows seamless integration into pipe-workflows.

sjlabelled/inst/doc/labelleddata.R0000644000176200001440000000256014046506731016627 0ustar liggesusers## ----echo = FALSE------------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") ## ----------------------------------------------------------------------------- factor(c("low", "high", "mid", "high", "low")) ## ----------------------------------------------------------------------------- library(sjlabelled) data(efc) str(efc$e42dep) ## ----warning=FALSE, fig.height=6, fig.width=7--------------------------------- library(sjlabelled) data(efc) barplot( table(efc$e42dep, efc$e16sex), beside = TRUE, legend.text = TRUE ) ## ----warning=FALSE, fig.height=6, fig.width=7--------------------------------- barplot( table(sjlabelled::as_label(efc$e42dep), sjlabelled::as_label(efc$e16sex)), beside = TRUE, legend.text = TRUE ) ## ----warning=FALSE, fig.height=6, fig.width=7--------------------------------- barplot( table(sjlabelled::as_label(efc$e42dep), sjlabelled::as_label(efc$e16sex)), beside = TRUE, legend.text = TRUE, main = get_label(efc$e42dep) ) ## ----------------------------------------------------------------------------- efc.sub <- subset(efc, subset = e16sex == 1, select = c(4:8)) str(efc.sub) ## ---- message=FALSE----------------------------------------------------------- efc.sub <- copy_labels(efc.sub, efc) str(efc.sub) sjlabelled/inst/doc/intro_sjlabelled.html0000644000176200001440000023365114046506731020317 0ustar liggesusers Labelled Data and the sjlabelled-Package

Labelled Data and the sjlabelled-Package

Daniel Lüdecke

2021-05-11

This package provides functions to read and write data between R and other statistical software packages like SPSS, SAS or Stata and to work with labelled data; this includes easy ways to get and set label attributes, to convert labelled vectors into factors (and vice versa), or to deal with multiple declared missing values etc.

This vignette gives an overview of functions to work with labelled data.

Labelled Data

Labelled data (or labelled vectors) is a common data structure in other statistical environments to store meta-information about variables, like variable names, value labels or multiple defined missing values.

Labelled data not only extends R’s capabilities to deal with proper value and variable labels, but also facilitates the representation of different types of missing values, like in other statistical software packages. Typically, in R, multiple declared missings cannot be represented in a similar way, like in ‘SPSS’ or ‘SAS’, with the regular missing values. However, the haven-package introduced tagged_na values, which can do this. Tagged NA’s work exactly like regular R missing values except that they store one additional byte of information: a tag, which is usually a letter (“a” to “z”) or also may be a character number (“0” to “9”). This allows to indicate different missings.

Functions of sjlabelled do not necessarily require vectors of class labelled or haven_labelled. The labelled class, implemented by the packages haven and labelled, may cause troubles with other packages, thus it’s only intended as being an intermediate data structure that should be converted to common R classes. However, coercing a labelled vector to other classes (like factor or numeric) typically means that meta information like value and variable label attributes are lost. Actually, there is no need to drop these attributes for non-labelled-class vectors. Functions like lm() simply copy these attributes to the data that is included in the returned object. Packages like sjPlot support labelled data for easily annotated data visualization. sjlabelled supports working with labelled data and offers functions to benefit from these features.

Note: Since package-version 2.0 of the haven-package, the labelled-class attribute was changed to haven_labelled, to avoid interferences with the Hmisc-package.

Labelled Data in haven and labelled

The labelled-package is intended to support labelled / haven_labelled metadata structures, thus the data structure of labelled vectors in haven and labelled is the same.

Labelled data in this format stores information about value labels, variable names and multiple defined missing values. However, variable names are only part of this information if data was imported with one of haven’s read-functions. Adding a variable label attribute is (at least up to version 1.0.0) not possible via the labelled()-constructor method.

library(haven)
x <- labelled(
  c(1:3, tagged_na("a", "c", "z"), 4:1),
  c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"),
    "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))
  )

print(x)
#> <labelled<double>[10]>
#>  [1]     1     2     3 NA(a) NA(c) NA(z)     4     3     2     1
#> 
#> Labels:
#>  value        label
#>      1    Agreement
#>      4 Disagreement
#>  NA(c)        First
#>  NA(a)      Refused
#>  NA(z)     Not home

A labelled vector can either be a numeric or character vector. Conversion to factors copies the value labels as factor levels, but drops the label attributes and missing information:

is.na(x)
#>  [1] FALSE FALSE FALSE  TRUE  TRUE  TRUE FALSE FALSE FALSE FALSE

as_factor(x)
#>  [1] Agreement    2            3            Refused      First       
#>  [6] Not home     Disagreement 3            2            Agreement   
#> Levels: Agreement 2 3 Disagreement Refused First Not home

is.na(as_factor(x))
#>  [1] FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE FALSE

Labelled Data in sjlabelled

sjlabelled supports label attributes in haven-style (label and labels). You’re not restricted to the labelled class for vectors when working with sjlabelled and labelled data. Hence, you can have vectors of common R classes and still use information like variable or value labels.

library(sjlabelled)
# sjlabelled-sample data, an atomic vector with label attributes
data(efc)
str(efc$e16sex)
#>  num [1:908] 2 2 2 2 2 2 1 2 2 2 ...
#>  - attr(*, "label")= chr "elder's gender"
#>  - attr(*, "labels")= Named num [1:2] 1 2
#>   ..- attr(*, "names")= chr [1:2] "male" "female"

Value Labels

Getting value labels

The get_labels()-method is a generic method to return value labels of a vector or data frame.

get_labels(efc$e42dep)
#> [1] "independent"          "slightly dependent"   "moderately dependent"
#> [4] "severely dependent"

You can prefix the value labels with the associated values or return them as named vector with the values argument.

get_labels(efc$e42dep, values = "p")
#> [1] "[1] independent"          "[2] slightly dependent"  
#> [3] "[3] moderately dependent" "[4] severely dependent"

get_labels() also returns “labels” of factors, even if the factor has no label attributes.

x <- factor(c("low", "mid", "low", "hi", "mid", "low"))
get_labels(x)
#> [1] "hi"  "low" "mid"

To ensure that labels are only returned for vectors with label-attribute, use the attr.only argument.

x <- factor(c("low", "mid", "low", "hi", "mid", "low"))
get_labels(x, attr.only = TRUE)
#> NULL

If a vector has a label attribute, only these labels are returned. Non-labelled values are excluded from the output by default…

# get labels, including tagged NA values
x <- labelled(
  c(1:3, tagged_na("a", "c", "z"), 4:1),
  c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"),
    "Refused" = tagged_na("a"), "Not home" = tagged_na("z"))
)
get_labels(x)
#> [1] "Agreement"    "Disagreement"

… however, you can add non-labelled values to the return value as well, using the non.labelled argument.

get_labels(x, non.labelled = TRUE)
#> [1] "Agreement"    "2"            "3"            "Disagreement"

Tagged missing values can also be included in the output, using the drop.na argument.

get_labels(x, values = "n", drop.na = FALSE)
#>              1              4          NA(c)          NA(a)          NA(z) 
#>    "Agreement" "Disagreement"        "First"      "Refused"     "Not home"

Getting labelled values

The get_values() method returns the values for labelled values (i.e. values that have an associated label). We still use the vector x from the above examples.

print(x)
#> <labelled<double>[10]>
#>  [1]     1     2     3 NA(a) NA(c) NA(z)     4     3     2     1
#> 
#> Labels:
#>  value        label
#>      1    Agreement
#>      4 Disagreement
#>  NA(c)        First
#>  NA(a)      Refused
#>  NA(z)     Not home

get_values(x)
#> [1] "1"     "4"     "NA(a)" "NA(c)" "NA(z)"

With the drop.na argument you can omit those values from the return values that are defined as missing.

get_values(x, drop.na = TRUE)
#> [1] 1 4

Setting value labels

With set_labels() you can add label attributes to any vector.

x <- sample(1:4, 20, replace = TRUE)

# return new labelled vector
x <- set_labels(x, labels = c("very low", "low", "mid", "hi"))
x
#>  [1] 2 1 1 3 4 1 1 1 1 2 3 4 4 2 3 4 1 1 4 2
#> attr(,"labels")
#> very low      low      mid       hi 
#>        1        2        3        4

If more labels than values are given, only as many labels elements are used as values are present.

x <- c(2, 2, 3, 3, 2)
x <- set_labels(x, labels = c("a", "b", "c"))
#> More labels than values of "x". Using first 2 labels.
x
#> [1] 2 2 3 3 2
#> attr(,"labels")
#> a b 
#> 2 3

However, you can force to use all labels, even for values that are not in the vector, using the force.labels argument.

x <- c(2, 2, 3, 3, 2)
x <- set_labels(
  x, 
  labels = c("a", "b", "c"), 
  force.labels = TRUE
)
x
#> [1] 2 2 3 3 2
#> attr(,"labels")
#> a b c 
#> 1 2 3

For vectors with more unique values than labels, additional labels for non-labelled values are added.

x <- c(1, 2, 3, 2, 4, NA)
x <- set_labels(x, labels = c("yes", "maybe", "no"))
#> More values in "x" than length of "labels". Additional values were added to labels.
x
#> [1]  1  2  3  2  4 NA
#> attr(,"labels")
#>   yes maybe    no     4 
#>     1     2     3     4

Use force.values to add only those labels that have been passed as argument.

x <- c(1, 2, 3, 2, 4, NA)
x <- set_labels(
  x, 
  labels = c("yes", "maybe", "no"),
  force.values = FALSE
)
#> "x" has more values than "labels", hence not all values are labelled.
x
#> [1]  1  2  3  2  4 NA
#> attr(,"labels")
#>   yes maybe    no 
#>     1     2     3

To add explicit labels for values (without adding more labels than wanted and without dropping labels for values that do not appear in the vector), use a named vector of labels as argument. The arguments force.values and force.labels are ignored when using named vectors.

x <- c(1, 2, 3, 2, 4, 5)
x <- set_labels(
  x, 
  labels = c("strongly agree" = 1, 
             "totally disagree" = 4, 
             "refused" = 5,
             "missing" = 9)
)
x
#> [1] 1 2 3 2 4 5
#> attr(,"labels")
#>   strongly agree totally disagree          refused          missing 
#>                1                4                5                9

If you want to set different value labels for a complete data frame, if you provide the labels as a list. For each variable in the data frame, provide a list element with value labels as character vector. Note that the length of the list must be equal to the number of variables (columns) in the data frame.

tmp <- data.frame(
  a = c(1, 2, 3),
  b = c(1, 2, 3),
  c = c(1, 2, 3)
)

labels <- list(
  c("one", "two", "three"),
  c("eins", "zwei", "drei"),
  c("un", "dos", "tres")
)

tmp <- set_labels(tmp, labels = labels)
str(tmp)
#> 'data.frame':    3 obs. of  3 variables:
#>  $ a: num  1 2 3
#>   ..- attr(*, "labels")= Named num [1:3] 1 2 3
#>   .. ..- attr(*, "names")= chr [1:3] "one" "two" "three"
#>  $ b: num  1 2 3
#>   ..- attr(*, "labels")= Named num [1:3] 1 2 3
#>   .. ..- attr(*, "names")= chr [1:3] "eins" "zwei" "drei"
#>  $ c: num  1 2 3
#>   ..- attr(*, "labels")= Named num [1:3] 1 2 3
#>   .. ..- attr(*, "names")= chr [1:3] "un" "dos" "tres"

You can use set_labels() within a pipe-workflow with dplyr.

library(dplyr)
library(sjmisc) # for frq()
data(efc)

efc %>% 
  select(c82cop1, c83cop2, c84cop3) %>% 
  set_labels(labels = c("not often" = 1, "very often" = 4)) %>% 
  frq()
#> 
#> do you feel you cope well as caregiver? (c82cop1) <numeric>
#> # total N=908  valid N=901  mean=3.12  sd=0.58
#> 
#> Value |      Label |   N | Raw % | Valid % | Cum. %
#> ---------------------------------------------------
#>     1 |  not often |   3 |  0.33 |    0.33 |   0.33
#>     2 |          2 |  97 | 10.68 |   10.77 |  11.10
#>     3 |          3 | 591 | 65.09 |   65.59 |  76.69
#>     4 | very often | 210 | 23.13 |   23.31 | 100.00
#>  <NA> |       <NA> |   7 |  0.77 |    <NA> |   <NA>
#> 
#> 
#> do you find caregiving too demanding? (c83cop2) <numeric>
#> # total N=908  valid N=902  mean=2.02  sd=0.72
#> 
#> Value |      Label |   N | Raw % | Valid % | Cum. %
#> ---------------------------------------------------
#>     1 |  not often | 186 | 20.48 |   20.62 |  20.62
#>     2 |          2 | 547 | 60.24 |   60.64 |  81.26
#>     3 |          3 | 130 | 14.32 |   14.41 |  95.68
#>     4 | very often |  39 |  4.30 |    4.32 | 100.00
#>  <NA> |       <NA> |   6 |  0.66 |    <NA> |   <NA>
#> 
#> 
#> does caregiving cause difficulties in your relationship with your friends? (c84cop3) <numeric>
#> # total N=908  valid N=902  mean=1.63  sd=0.87
#> 
#> Value |      Label |   N | Raw % | Valid % | Cum. %
#> ---------------------------------------------------
#>     1 |  not often | 516 | 56.83 |   57.21 |  57.21
#>     2 |          2 | 252 | 27.75 |   27.94 |  85.14
#>     3 |          3 |  82 |  9.03 |    9.09 |  94.24
#>     4 | very often |  52 |  5.73 |    5.76 | 100.00
#>  <NA> |       <NA> |   6 |  0.66 |    <NA> |   <NA>

Variable Labels

Getting variable labels

The get_label()-method returns the variable label of a vector or all variable labels from a data frame.

get_label(efc$e42dep)
#> [1] "elder's dependency"

get_label(efc, e42dep, e16sex, e15relat)
#>                  e42dep                  e16sex                e15relat 
#>    "elder's dependency"        "elder's gender" "relationship to elder"

If a vector has no variable label, NULL is returned. However, get_label() also allows returning a standard value instead of NULL, in case the vector has no label attribute. This is useful to combine with deparse(substitute()) in function calls, so - for instance - the name of the vector can be used as default value if no variable labels are present.

dummy <- c(1, 2, 3)
testit <- function(x) get_label(x, def.value = deparse(substitute(x)))
# returns name of vector, if it has no variable label
testit(dummy)
#> [1] "dummy"

If you want human-readable labels, you can use the case-argument, which will pass the labels to a string parser in the snakecase-package.

data(iris)

# returns no labels, because iris-data is not labelled
get_label(iris)
#> Sepal.Length  Sepal.Width Petal.Length  Petal.Width      Species 
#>           ""           ""           ""           ""           ""

# returns the column name as default labels, if data is not labelled
get_label(iris, def.value = colnames(iris))
#>   Sepal.Length    Sepal.Width   Petal.Length    Petal.Width        Species 
#> "Sepal.Length"  "Sepal.Width" "Petal.Length"  "Petal.Width"      "Species"

# labels are parsed in a readable way
get_label(iris, def.value = colnames(iris), case = "parsed")
#>   Sepal.Length    Sepal.Width   Petal.Length    Petal.Width        Species 
#> "Sepal Length"  "Sepal Width" "Petal Length"  "Petal Width"      "Species"

Setting variable labels

The set_label() function adds the variable label attribute to a vector. You can either return a new vector, or label an existing vector

x <- sample(1:4, 10, replace = TRUE)

# return new vector
x <- set_label(x, label = "Dummy-variable")
str(x)
#>  int [1:10] 1 4 3 2 1 1 1 3 2 3
#>  - attr(*, "label")= chr "Dummy-variable"

# label existing vector
set_label(x) <- "Another Dummy-variable"
str(x)
#>  int [1:10] 1 4 3 2 1 1 1 3 2 3
#>  - attr(*, "label")= chr "Another Dummy-variable"

set_label() can also set variable labels for a data frame. In this case, the variable attributes get an additional name attribute with the vector’s name. This makes it easier to see which label belongs to which vector.

x <- data.frame(
  a = sample(1:4, 10, replace = TRUE),
  b = sample(1:4, 10, replace = TRUE),
  c = sample(1:4, 10, replace = TRUE)
)
x <- set_label(x, label = c("Variable A",
                            "Variable B",
                            "Variable C"))

str(x)
#> 'data.frame':    10 obs. of  3 variables:
#>  $ a: int  4 3 1 4 4 3 2 4 3 4
#>   ..- attr(*, "label")= Named chr "Variable A"
#>   .. ..- attr(*, "names")= chr "a"
#>  $ b: int  4 4 2 1 4 2 2 3 2 2
#>   ..- attr(*, "label")= Named chr "Variable B"
#>   .. ..- attr(*, "names")= chr "b"
#>  $ c: int  4 1 2 4 3 1 2 3 4 2
#>   ..- attr(*, "label")= Named chr "Variable C"
#>   .. ..- attr(*, "names")= chr "c"

get_label(x)
#>            a            b            c 
#> "Variable A" "Variable B" "Variable C"

An alternative to set_label() is var_labels(), which also works within pipe-workflows. var_labels() requires named vectors as arguments to match the column names of the input, and set the associated variable labels.

x <- data.frame(
  a = sample(1:4, 10, replace = TRUE),
  b = sample(1:4, 10, replace = TRUE),
  c = sample(1:4, 10, replace = TRUE)
)

library(magrittr) # for pipe
x %>% 
  var_labels(
    a = "Variable A",
    b = "Variable B",
    c = "Variable C"
  ) %>% 
  str()
#> 'data.frame':    10 obs. of  3 variables:
#>  $ a: int  1 2 3 3 1 2 2 2 2 4
#>   ..- attr(*, "label")= chr "Variable A"
#>  $ b: int  3 1 1 2 4 2 3 1 3 4
#>   ..- attr(*, "label")= chr "Variable B"
#>  $ c: int  1 1 3 2 2 2 1 1 2 1
#>   ..- attr(*, "label")= chr "Variable C"

Missing Values

Defining missing values

set_na() converts values of a vector or of multiple vectors in a data frame into NAs. With as.tag = TRUE, set_na() creates tagged NA values, which means that these missing values get an information tag and a value label (which is, by default, the former value that was converted to NA). You can either return a new vector/data frame, or set NAs into an existing vector/data frame.

x <- sample(1:8, 100, replace = TRUE)
# show value distribution
table(x)
#> x
#>  1  2  3  4  5  6  7  8 
#> 15 15  8 15  9 10 18 10

# set value 1 and 8 as tagged missings
x <- set_na(x, na = c(1, 8), as.tag = TRUE)
x
#>   [1] NA  6  4  6  5  2  7 NA  7  4  7 NA NA  4  5  2 NA NA NA  4  2  2  4  4 NA
#>  [26]  7  7  2 NA  4  3  3  2 NA  2 NA  4  3  4  4  3  2  4  5  2  5  5  7  6  5
#>  [51]  5  2  7 NA  7 NA NA  6  4  6  2 NA  4  7  7 NA  3 NA  7  2  6 NA  3 NA  7
#>  [76]  5  3 NA  6  7  3  4  2 NA  7  5 NA NA NA NA  7  6  2  6  6  7  2  7  4  7
#> attr(,"labels")
#>  1  8 
#> NA NA

# show value distribution, including missings
table(x, useNA = "always")
#> x
#>    2    3    4    5    6    7 <NA> 
#>   15    8   15    9   10   18   25

# now let's see, which NA's were "1" and which were "8"
print_tagged_na(x)
#>   [1] NA(1)     6     4     6     5     2     7 NA(1)     7     4     7 NA(1)
#>  [13] NA(1)     4     5     2 NA(1) NA(8) NA(8)     4     2     2     4     4
#>  [25] NA(1)     7     7     2 NA(1)     4     3     3     2 NA(1)     2 NA(1)
#>  [37]     4     3     4     4     3     2     4     5     2     5     5     7
#>  [49]     6     5     5     2     7 NA(1)     7 NA(8) NA(8)     6     4     6
#>  [61]     2 NA(1)     4     7     7 NA(1)     3 NA(1)     7     2     6 NA(1)
#>  [73]     3 NA(8)     7     5     3 NA(8)     6     7     3     4     2 NA(1)
#>  [85]     7     5 NA(8) NA(8) NA(8) NA(8)     7     6     2     6     6     7
#>  [97]     2     7     4     7

x <- factor(c("a", "b", "c"))
x
#> [1] a b c
#> Levels: a b c

# set NA into existing vector
x <- set_na(x, na = "b", as.tag = TRUE)
x
#> [1] a    <NA> c   
#> attr(,"labels")
#>  b 
#> NA 
#> Levels: a c

Getting missing values

The get_na() function returns all tagged NA values. We still use the vector x from the previous example.

get_na(x)
#>  b 
#> NA

To see the tags of the NA values, use the as.tag argument.

get_na(x, as.tag = TRUE)
#>       b 
#> "NA(b)"

Replacing specific NA with values

While set_na() allows you to replace values with (tagged) NA’s, replace_na() (from package sjmisc) allows you to replace either all NA values of a vector or specific tagged NA values with a non-NA value.

library(sjmisc) # for replace_na()
data(efc)
str(efc$c84cop3)
#>  num [1:908] 2 3 1 3 1 3 4 2 3 1 ...
#>  - attr(*, "label")= chr "does caregiving cause difficulties in your relationship with your friends?"
#>  - attr(*, "labels")= Named num [1:4] 1 2 3 4
#>   ..- attr(*, "names")= chr [1:4] "Never" "Sometimes" "Often" "Always"

efc$c84cop3 <- set_na(efc$c84cop3, na = c(2, 3), as.tag = TRUE)
get_na(efc$c84cop3, as.tag = TRUE)
#> Sometimes     Often 
#>   "NA(2)"   "NA(3)"

# this would replace all NA's into "2"
dummy <- replace_na(efc$c84cop3, value = 2)

# labels of former tagged NA's are preserved
get_labels(dummy, drop.na = FALSE, values = "p")
#> [1] "[1] Never"         "[4] Always"        "[NA(2)] Sometimes"
#> [4] "[NA(3)] Often"
get_na(dummy, as.tag = TRUE)
#> Sometimes     Often 
#>   "NA(2)"   "NA(3)"

# No more NA values
frq(dummy)
#> 
#> does caregiving cause difficulties in your relationship with your friends? (x) <numeric>
#> # total N=908  valid N=908  mean=1.55  sd=0.77
#> 
#> Value |  Label |   N | Raw % | Valid % | Cum. %
#> -----------------------------------------------
#>     1 |  Never | 516 | 56.83 |   56.83 |  56.83
#>     2 |      2 | 340 | 37.44 |   37.44 |  94.27
#>     4 | Always |  52 |  5.73 |    5.73 | 100.00
#>  <NA> |   <NA> |   0 |  0.00 |    <NA> |   <NA>


# In this example, the tagged NA(2) is replaced with value 2
# the new value label for value 2 is "restored NA"
dummy <- replace_na(efc$c84cop3, value = 2, na.label = "restored NA", tagged.na = "2")

# Only one tagged NA remains
get_labels(dummy, drop.na = FALSE, values = "p")
#> [1] "[1] Never"       "[2] restored NA" "[4] Always"      "[NA(3)] Often"
get_na(dummy, as.tag = TRUE)
#>   Often 
#> "NA(3)"

# Some NA values remain
frq(dummy)
#> 
#> does caregiving cause difficulties in your relationship with your friends? (x) <numeric>
#> # total N=908  valid N=820  mean=1.50  sd=0.79
#> 
#> Value |       Label |   N | Raw % | Valid % | Cum. %
#> ----------------------------------------------------
#>     1 |       Never | 516 | 56.83 |   62.93 |  62.93
#>     2 | restored NA | 252 | 27.75 |   30.73 |  93.66
#>     4 |      Always |  52 |  5.73 |    6.34 | 100.00
#>  <NA> |        <NA> |  88 |  9.69 |    <NA> |   <NA>

Replacing values labels

With replace_labels(), you can replace (change) value labels of labelled values. This can also be used to change the labels of tagged missing values. Make sure to know the missing tag, which can be accessed via get_na().

str(efc$c82cop1)
#>  num [1:908] 3 3 2 4 3 2 4 3 3 3 ...
#>  - attr(*, "label")= chr "do you feel you cope well as caregiver?"
#>  - attr(*, "labels")= Named num [1:4] 1 2 3 4
#>   ..- attr(*, "names")= chr [1:4] "never" "sometimes" "often" "always"

efc$c82cop1 <- set_na(efc$c82cop1, na = c(2, 3), as.tag = TRUE)
get_na(efc$c82cop1, as.tag = TRUE)
#> sometimes     often 
#>   "NA(2)"   "NA(3)"

efc$c82cop1 <- replace_labels(efc$c82cop1, labels = c("new NA label" = tagged_na("2")))
#> tagged NA 'sometimes' was replaced with new value label.

get_na(efc$c82cop1, as.tag = TRUE)
#> new NA label        often 
#>      "NA(2)"      "NA(3)"
sjlabelled/inst/doc/quasiquotation.Rmd0000644000176200001440000001374113647275302017646 0ustar liggesusers--- title: "Using quasiquotation to add variable and value labels" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Using quasiquotation to add variable and value labels} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("rlang", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ``` Labelling data is typically a task for end-users and is applied in own scripts or functions rather than in packages. However, sometimes it can be useful for both end-users and package developers to have a flexible way to add variable and value labels to their data. In such cases, [quasiquotation](https://adv-r.hadley.nz/quasiquotation.html) is helpful. This vignette demonstrate how to use quasiquotation in _sjlabelled_ to label your data. ## Adding value labels to variables using quasiquotation Usually, `set_labels()` can be used to add value labels to variables. The syntax of this function is easy to use, and `set_labels()` allows to add value labels to multiple variables at once, if these variables share the same value labels. In the following examples, we will use the `frq()` function, that shows an extra **label**-column containing _value labels_, if the data is labelled. If the data has _no_ value labels, this column is not shown in the output. ```{r message=FALSE, warning=FALSE} library(sjlabelled) library(sjmisc) # for frq()-function library(rlang) # unlabelled data dummies <- data.frame( dummy1 = sample(1:3, 40, replace = TRUE), dummy2 = sample(1:3, 40, replace = TRUE), dummy3 = sample(1:3, 40, replace = TRUE) ) # set labels for all variables in the data frame test <- set_labels(dummies, labels = c("low", "mid", "hi")) attr(test$dummy1, "labels") frq(test, dummy1) # and set same value labels for two of three variables test <- set_labels( dummies, dummy1, dummy2, labels = c("low", "mid", "hi") ) frq(test) ``` `val_labels()` does the same job as `set_labels()`, but in a different way. While `set_labels()` requires variables to be specified in the `...`-argument, and labels in the `labels`-argument, `val_labels()` requires both to be specified in the `...`. `val_labels()` requires _named_ vectors as argument, with the _left-hand side_ being the name of the variable that should be labelled, and the _right-hand side_ containing the labels for the values. ```{r message=FALSE, warning=FALSE} test <- val_labels(dummies, dummy1 = c("low", "mid", "hi")) attr(test$dummy1, "labels") # remaining variables are not labelled frq(test) ``` Unlike `set_labels()`, `val_labels()` allows the user to add _different_ value labels to different variables in one function call. Another advantage, or difference, of `val_labels()` is it's flexibility in defining variable names and value labels by using quasiquotation. ### Add labels that are stored in a vector To use quasiquotation, we need the **rlang** package to be installed and loaded. Now we can have labels in a character vector, and use `!!` to unquote this vector. ```{r message=FALSE, warning=FALSE} labels <- c("low_quote", "mid_quote", "hi_quote") test <- val_labels(dummies, dummy1 = !! labels) attr(test$dummy1, "labels") ``` ### Define variable names that are stored in a vector The same can be done with the names of _variables_ that should get new value labels. We then need `!!` to unquote the variable name and `:=` as assignment. ```{r message=FALSE, warning=FALSE} variable <- "dummy2" test <- val_labels(dummies, !! variable := c("lo_var", "mid_var", "high_var")) # no value labels attr(test$dummy1, "labels") # value labels attr(test$dummy2, "labels") ``` ### Both variable names and value labels are stored in a vector Finally, we can combine the above approaches to be flexible regarding both variable names and value labels. ```{r message=FALSE, warning=FALSE} variable <- "dummy3" labels <- c("low", "mid", "hi") test <- val_labels(dummies, !! variable := !! labels) attr(test$dummy3, "labels") ``` ## Adding variable labels using quasiquotation `set_label()` is the equivalent to `set_labels()` to add variable labels to a variable. The equivalent to `val_labels()` is `var_labels()`, which works in the same way as `val_labels()`. In case of _variable_ labels, a `label`-attribute is added to a vector or factor (instead of a `labels`-attribute, which is used for _value_ labels). The following examples show how to use `var_labels()` to add variable labels to the data. We demonstrate this function without further explanation, because it is actually very similar to `val_labels()`. ```{r message=FALSE, warning=FALSE} dummy <- data.frame( a = sample(1:4, 10, replace = TRUE), b = sample(1:4, 10, replace = TRUE), c = sample(1:4, 10, replace = TRUE) ) # simple usage test <- var_labels(dummy, a = "first variable", c = "third variable") attr(test$a, "label") attr(test$b, "label") attr(test$c, "label") # quasiquotation for labels v1 <- "First variable" v2 <- "Second variable" test <- var_labels(dummy, a = !! v1, b = !! v2) attr(test$a, "label") attr(test$b, "label") attr(test$c, "label") # quasiquotation for variable names x1 <- "a" x2 <- "c" test <- var_labels(dummy, !! x1 := "First", !! x2 := "Second") attr(test$a, "label") attr(test$b, "label") attr(test$c, "label") # quasiquotation for both variable names and labels test <- var_labels(dummy, !! x1 := !! v1, !! x2 := !! v2) attr(test$a, "label") attr(test$b, "label") attr(test$c, "label") ``` ## Conclusion As we have demonstrated, `var_labels()` and `val_labels()` are one of the most flexible and easy-to-use ways to add value and variable labels to our data. Another advantage is the consistent design of all functions in **sjlabelled**, which allows seamless integration into pipe-workflows. sjlabelled/inst/doc/intro_sjlabelled.Rmd0000644000176200001440000003404713647275404020101 0ustar liggesusers--- title: "Labelled Data and the sjlabelled-Package" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Labelled Data and the sjlabelled-Package} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("haven", quietly = TRUE) || !requireNamespace("magrittr", quietly = TRUE) || !requireNamespace("dplyr", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ``` This package provides functions to read and write data between R and other statistical software packages like _SPSS_, _SAS_ or _Stata_ and to work with labelled data; this includes easy ways to get and set label attributes, to convert labelled vectors into factors (and vice versa), or to deal with multiple declared missing values etc. This vignette gives an overview of functions to work with labelled data. # Labelled Data _Labelled data_ (or labelled vectors) is a common data structure in other statistical environments to store meta-information about variables, like variable names, value labels or multiple defined missing values. Labelled data not only extends **R**'s capabilities to deal with proper value _and_ variable labels, but also facilitates the representation of different types of missing values, like in other statistical software packages. Typically, in R, multiple declared missings cannot be represented in a similar way, like in 'SPSS' or 'SAS', with the regular missing values. However, the **haven**-package introduced `tagged_na` values, which can do this. Tagged NA's work exactly like regular R missing values except that they store one additional byte of information: a tag, which is usually a letter ("a" to "z") or also may be a character number ("0" to "9"). This allows to indicate different missings. Functions of **sjlabelled** do not necessarily require vectors of class `labelled` or `haven_labelled`. The `labelled` class, implemented by the packages **haven** and **labelled**, may cause troubles with other packages, thus it's only intended as being an intermediate data structure that should be converted to common R classes. However, coercing a `labelled` vector to other classes (like factor or numeric) typically means that meta information like value and variable label attributes are lost. Actually, there is no need to drop these attributes for non-`labelled`-class vectors. Functions like `lm()` simply copy these attributes to the data that is included in the returned object. Packages like **sjPlot** support labelled data for easily annotated data visualization. **sjlabelled** supports working with _labelled data_ and offers functions to benefit from these features. **Note:** Since package-version 2.0 of the **haven**-package, the `labelled`-class attribute was changed to `haven_labelled`, to avoid interferences with the **Hmisc**-package. ## Labelled Data in haven and labelled The **labelled**-package is intended to support `labelled` / `haven_labelled` metadata structures, thus the data structure of labelled vectors in **haven** and **labelled** is the same. Labelled data in this format stores information about value labels, variable names and multiple defined missing values. However, _variable names_ are only part of this information if data was imported with one of **haven**'s read-functions. Adding a variable label attribute is (at least up to version 1.0.0) not possible via the `labelled()`-constructor method. ```{r} library(haven) x <- labelled( c(1:3, tagged_na("a", "c", "z"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) ) print(x) ``` A `labelled` vector can either be a numeric or character vector. Conversion to factors copies the value labels as factor levels, but drops the label attributes and missing information: ```{r} is.na(x) as_factor(x) is.na(as_factor(x)) ``` ## Labelled Data in sjlabelled **sjlabelled** supports label attributes in **haven**-style (`label` and `labels`). You're not restricted to the `labelled` class for vectors when working with **sjlabelled** and labelled data. Hence, you can have vectors of common R classes and still use information like variable or value labels. ```{r message=FALSE} library(sjlabelled) # sjlabelled-sample data, an atomic vector with label attributes data(efc) str(efc$e16sex) ``` # Value Labels ## Getting value labels The `get_labels()`-method is a generic method to return value labels of a vector or data frame. ```{r} get_labels(efc$e42dep) ``` You can prefix the value labels with the associated values or return them as named vector with the `values` argument. ```{r} get_labels(efc$e42dep, values = "p") ``` `get_labels()` also returns "labels" of factors, even if the factor has no label attributes. ```{r} x <- factor(c("low", "mid", "low", "hi", "mid", "low")) get_labels(x) ``` To ensure that labels are only returned for vectors with label-attribute, use the `attr.only` argument. ```{r} x <- factor(c("low", "mid", "low", "hi", "mid", "low")) get_labels(x, attr.only = TRUE) ``` If a vector has a label attribute, only these labels are returned. Non-labelled values are excluded from the output by default... ```{r} # get labels, including tagged NA values x <- labelled( c(1:3, tagged_na("a", "c", "z"), 4:1), c("Agreement" = 1, "Disagreement" = 4, "First" = tagged_na("c"), "Refused" = tagged_na("a"), "Not home" = tagged_na("z")) ) get_labels(x) ``` ... however, you can add non-labelled values to the return value as well, using the `non.labelled` argument. ```{r} get_labels(x, non.labelled = TRUE) ``` Tagged missing values can also be included in the output, using the `drop.na` argument. ```{r} get_labels(x, values = "n", drop.na = FALSE) ``` ## Getting labelled values The `get_values()` method returns the values for labelled values (i.e. values that have an associated label). We still use the vector `x` from the above examples. ```{r} print(x) get_values(x) ``` With the `drop.na` argument you can omit those values from the return values that are defined as missing. ```{r} get_values(x, drop.na = TRUE) ``` ## Setting value labels With `set_labels()` you can add label attributes to any vector. ```{r} x <- sample(1:4, 20, replace = TRUE) # return new labelled vector x <- set_labels(x, labels = c("very low", "low", "mid", "hi")) x ``` If more labels than values are given, only as many labels elements are used as values are present. ```{r} x <- c(2, 2, 3, 3, 2) x <- set_labels(x, labels = c("a", "b", "c")) x ``` However, you can force to use all labels, even for values that are not in the vector, using the `force.labels` argument. ```{r} x <- c(2, 2, 3, 3, 2) x <- set_labels( x, labels = c("a", "b", "c"), force.labels = TRUE ) x ``` For vectors with more unique values than labels, additional labels for non-labelled values are added. ```{r} x <- c(1, 2, 3, 2, 4, NA) x <- set_labels(x, labels = c("yes", "maybe", "no")) x ``` Use `force.values` to add only those labels that have been passed as argument. ```{r} x <- c(1, 2, 3, 2, 4, NA) x <- set_labels( x, labels = c("yes", "maybe", "no"), force.values = FALSE ) x ``` To add explicit labels for values (without adding more labels than wanted and without dropping labels for values that do not appear in the vector), use a named vector of labels as argument. The arguments `force.values` and `force.labels` are ignored when using named vectors. ```{r} x <- c(1, 2, 3, 2, 4, 5) x <- set_labels( x, labels = c("strongly agree" = 1, "totally disagree" = 4, "refused" = 5, "missing" = 9) ) x ``` If you want to set different value labels for a complete data frame, if you provide the labels as a `list`. For each variable in the data frame, provide a list element with value labels as character vector. Note that the length of the list must be equal to the number of variables (columns) in the data frame. ```{r} tmp <- data.frame( a = c(1, 2, 3), b = c(1, 2, 3), c = c(1, 2, 3) ) labels <- list( c("one", "two", "three"), c("eins", "zwei", "drei"), c("un", "dos", "tres") ) tmp <- set_labels(tmp, labels = labels) str(tmp) ``` You can use `set_labels()` within a pipe-workflow with _dplyr_. ```{r message=FALSE} library(dplyr) library(sjmisc) # for frq() data(efc) efc %>% select(c82cop1, c83cop2, c84cop3) %>% set_labels(labels = c("not often" = 1, "very often" = 4)) %>% frq() ``` # Variable Labels ## Getting variable labels The `get_label()`-method returns the variable label of a vector or all variable labels from a data frame. ```{r} get_label(efc$e42dep) get_label(efc, e42dep, e16sex, e15relat) ``` If a vector has no variable label, `NULL` is returned. However, `get_label()` also allows returning a standard value instead of `NULL`, in case the vector has no label attribute. This is useful to combine with `deparse(substitute())` in function calls, so - for instance - the name of the vector can be used as default value if no variable labels are present. ```{r} dummy <- c(1, 2, 3) testit <- function(x) get_label(x, def.value = deparse(substitute(x))) # returns name of vector, if it has no variable label testit(dummy) ``` If you want human-readable labels, you can use the `case`-argument, which will pass the labels to a string parser in the [snakecase-package](https://cran.r-project.org/package=snakecase). ```{r} data(iris) # returns no labels, because iris-data is not labelled get_label(iris) # returns the column name as default labels, if data is not labelled get_label(iris, def.value = colnames(iris)) # labels are parsed in a readable way get_label(iris, def.value = colnames(iris), case = "parsed") ``` ## Setting variable labels The `set_label()` function adds the variable label attribute to a vector. You can either return a new vector, or label an existing vector ```{r} x <- sample(1:4, 10, replace = TRUE) # return new vector x <- set_label(x, label = "Dummy-variable") str(x) # label existing vector set_label(x) <- "Another Dummy-variable" str(x) ``` `set_label()` can also set variable labels for a data frame. In this case, the variable attributes get an additional `name` attribute with the vector's name. This makes it easier to see which label belongs to which vector. ```{r} x <- data.frame( a = sample(1:4, 10, replace = TRUE), b = sample(1:4, 10, replace = TRUE), c = sample(1:4, 10, replace = TRUE) ) x <- set_label(x, label = c("Variable A", "Variable B", "Variable C")) str(x) get_label(x) ``` An alternative to `set_label()` is `var_labels()`, which also works within pipe-workflows. `var_labels()` requires named vectors as arguments to match the column names of the input, and set the associated variable labels. ```{r} x <- data.frame( a = sample(1:4, 10, replace = TRUE), b = sample(1:4, 10, replace = TRUE), c = sample(1:4, 10, replace = TRUE) ) library(magrittr) # for pipe x %>% var_labels( a = "Variable A", b = "Variable B", c = "Variable C" ) %>% str() ``` # Missing Values ## Defining missing values `set_na()` converts values of a vector or of multiple vectors in a data frame into `NA`s. With `as.tag = TRUE`, `set_na()` creates tagged `NA` values, which means that these missing values get an information tag and a value label (which is, by default, the former value that was converted to NA). You can either return a new vector/data frame, or set `NA`s into an existing vector/data frame. ```{r} x <- sample(1:8, 100, replace = TRUE) # show value distribution table(x) # set value 1 and 8 as tagged missings x <- set_na(x, na = c(1, 8), as.tag = TRUE) x # show value distribution, including missings table(x, useNA = "always") # now let's see, which NA's were "1" and which were "8" print_tagged_na(x) x <- factor(c("a", "b", "c")) x # set NA into existing vector x <- set_na(x, na = "b", as.tag = TRUE) x ``` ## Getting missing values The `get_na()` function returns all tagged NA values. We still use the vector `x` from the previous example. ```{r} get_na(x) ``` To see the tags of the NA values, use the `as.tag` argument. ```{r} get_na(x, as.tag = TRUE) ``` ## Replacing specific NA with values While `set_na()` allows you to replace values with (tagged) NA's, `replace_na()` (from package **sjmisc**) allows you to replace either all NA values of a vector or specific tagged NA values with a non-NA value. ```{r} library(sjmisc) # for replace_na() data(efc) str(efc$c84cop3) efc$c84cop3 <- set_na(efc$c84cop3, na = c(2, 3), as.tag = TRUE) get_na(efc$c84cop3, as.tag = TRUE) # this would replace all NA's into "2" dummy <- replace_na(efc$c84cop3, value = 2) # labels of former tagged NA's are preserved get_labels(dummy, drop.na = FALSE, values = "p") get_na(dummy, as.tag = TRUE) # No more NA values frq(dummy) # In this example, the tagged NA(2) is replaced with value 2 # the new value label for value 2 is "restored NA" dummy <- replace_na(efc$c84cop3, value = 2, na.label = "restored NA", tagged.na = "2") # Only one tagged NA remains get_labels(dummy, drop.na = FALSE, values = "p") get_na(dummy, as.tag = TRUE) # Some NA values remain frq(dummy) ``` ## Replacing values labels With `replace_labels()`, you can replace (change) value labels of labelled values. This can also be used to change the labels of tagged missing values. Make sure to know the missing tag, which can be accessed via `get_na()`. ```{r} str(efc$c82cop1) efc$c82cop1 <- set_na(efc$c82cop1, na = c(2, 3), as.tag = TRUE) get_na(efc$c82cop1, as.tag = TRUE) efc$c82cop1 <- replace_labels(efc$c82cop1, labels = c("new NA label" = tagged_na("2"))) get_na(efc$c82cop1, as.tag = TRUE) ``` sjlabelled/inst/doc/quasiquotation.R0000644000176200001440000000546114046506732017323 0ustar liggesusers## ----echo = FALSE------------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("rlang", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ## ----message=FALSE, warning=FALSE--------------------------------------------- library(sjlabelled) library(sjmisc) # for frq()-function library(rlang) # unlabelled data dummies <- data.frame( dummy1 = sample(1:3, 40, replace = TRUE), dummy2 = sample(1:3, 40, replace = TRUE), dummy3 = sample(1:3, 40, replace = TRUE) ) # set labels for all variables in the data frame test <- set_labels(dummies, labels = c("low", "mid", "hi")) attr(test$dummy1, "labels") frq(test, dummy1) # and set same value labels for two of three variables test <- set_labels( dummies, dummy1, dummy2, labels = c("low", "mid", "hi") ) frq(test) ## ----message=FALSE, warning=FALSE--------------------------------------------- test <- val_labels(dummies, dummy1 = c("low", "mid", "hi")) attr(test$dummy1, "labels") # remaining variables are not labelled frq(test) ## ----message=FALSE, warning=FALSE--------------------------------------------- labels <- c("low_quote", "mid_quote", "hi_quote") test <- val_labels(dummies, dummy1 = !! labels) attr(test$dummy1, "labels") ## ----message=FALSE, warning=FALSE--------------------------------------------- variable <- "dummy2" test <- val_labels(dummies, !! variable := c("lo_var", "mid_var", "high_var")) # no value labels attr(test$dummy1, "labels") # value labels attr(test$dummy2, "labels") ## ----message=FALSE, warning=FALSE--------------------------------------------- variable <- "dummy3" labels <- c("low", "mid", "hi") test <- val_labels(dummies, !! variable := !! labels) attr(test$dummy3, "labels") ## ----message=FALSE, warning=FALSE--------------------------------------------- dummy <- data.frame( a = sample(1:4, 10, replace = TRUE), b = sample(1:4, 10, replace = TRUE), c = sample(1:4, 10, replace = TRUE) ) # simple usage test <- var_labels(dummy, a = "first variable", c = "third variable") attr(test$a, "label") attr(test$b, "label") attr(test$c, "label") # quasiquotation for labels v1 <- "First variable" v2 <- "Second variable" test <- var_labels(dummy, a = !! v1, b = !! v2) attr(test$a, "label") attr(test$b, "label") attr(test$c, "label") # quasiquotation for variable names x1 <- "a" x2 <- "c" test <- var_labels(dummy, !! x1 := "First", !! x2 := "Second") attr(test$a, "label") attr(test$b, "label") attr(test$c, "label") # quasiquotation for both variable names and labels test <- var_labels(dummy, !! x1 := !! v1, !! x2 := !! v2) attr(test$a, "label") attr(test$b, "label") attr(test$c, "label") sjlabelled/inst/CITATION0000644000176200001440000000055113446531213014470 0ustar liggesusersyear <- sub("-.*", "", meta$Date) title <- sprintf("sjlabelled: Labelled Data Utility Functions (Version %s)", meta$Version) bibentry(bibtype="manual", title = title, author = person("Daniel", "Lüdecke"), year = year, url = "https://CRAN.R-project.org/package=sjlabelled", doi = "10.5281/zenodo.1249215")