sjPlot/0000755000176200001440000000000014150203412011515 5ustar liggesuserssjPlot/NAMESPACE0000644000176200001440000001562614150131312012745 0ustar liggesusers# Generated by roxygen2: do not edit by hand S3method(knit_print,sjTable) S3method(knit_print,sjt_descr) S3method(knit_print,sjt_frq) S3method(knit_print,sjt_grpdescr) S3method(knit_print,sjt_grpmean) S3method(knit_print,sjt_grpmeans) S3method(knit_print,sjt_mwu) S3method(knit_print,sjt_reliab) S3method(print,sjTable) S3method(print,sjt_descr) S3method(print,sjt_equi_test) S3method(print,sjt_frq) S3method(print,sjt_grpdescr) S3method(print,sjt_grpmean) S3method(print,sjt_grpmeans) S3method(print,sjt_mwu) S3method(print,sjt_reliab) export(css_theme) export(dist_chisq) export(dist_f) export(dist_norm) export(dist_t) export(font_size) export(get_model_data) export(label_angle) export(legend_style) export(plot_frq) export(plot_gpt) export(plot_grid) export(plot_grpfrq) export(plot_kfold_cv) export(plot_likert) export(plot_model) export(plot_models) export(plot_residuals) export(plot_scatter) export(plot_stackfrq) export(plot_xtab) export(save_plot) export(scale_color_sjplot) export(scale_fill_sjplot) export(set_theme) export(show_sjplot_pals) export(sjp.aov1) export(sjp.chi2) export(sjp.corr) export(sjp.poly) export(sjplot) export(sjplot_pal) export(sjt.itemanalysis) export(sjt.xtab) export(sjtab) export(tab_corr) export(tab_df) export(tab_dfs) export(tab_fa) export(tab_itemscale) export(tab_model) export(tab_pca) export(tab_stackfrq) export(tab_xtab) export(theme_538) export(theme_blank) export(theme_sjplot) export(theme_sjplot2) export(view_df) import(ggplot2) importFrom(MASS,glm.nb) importFrom(bayestestR,ci) importFrom(datawizard,data_partition) importFrom(datawizard,skewness) importFrom(dplyr,"%>%") importFrom(dplyr,arrange) importFrom(dplyr,between) importFrom(dplyr,bind_rows) importFrom(dplyr,case_when) importFrom(dplyr,filter) importFrom(dplyr,full_join) importFrom(dplyr,group_by) importFrom(dplyr,group_modify) importFrom(dplyr,group_vars) importFrom(dplyr,if_else) importFrom(dplyr,inner_join) importFrom(dplyr,mutate) importFrom(dplyr,n) importFrom(dplyr,n_distinct) importFrom(dplyr,quos) importFrom(dplyr,select) importFrom(dplyr,slice) importFrom(dplyr,summarise) importFrom(dplyr,summarize) importFrom(dplyr,ungroup) importFrom(effectsize,standardize) importFrom(ggeffects,ggeffect) importFrom(ggeffects,ggpredict) importFrom(grDevices,axisTicks) importFrom(grDevices,cm) importFrom(grDevices,colorRampPalette) importFrom(grDevices,dev.off) importFrom(grDevices,jpeg) importFrom(grDevices,png) importFrom(grDevices,rgb) importFrom(grDevices,svg) importFrom(grDevices,tiff) importFrom(graphics,plot) importFrom(insight,find_interactions) importFrom(insight,find_predictors) importFrom(insight,find_random) importFrom(insight,find_response) importFrom(insight,get_data) importFrom(insight,get_response) importFrom(insight,has_intercept) importFrom(insight,is_multivariate) importFrom(insight,model_info) importFrom(insight,n_obs) importFrom(insight,standardize_names) importFrom(knitr,asis_output) importFrom(knitr,knit_print) importFrom(parameters,closest_component) importFrom(parameters,kurtosis) importFrom(parameters,model_parameters) importFrom(parameters,principal_components) importFrom(performance,cronbachs_alpha) importFrom(performance,item_intercor) importFrom(performance,item_reliability) importFrom(performance,performance_aic) importFrom(performance,performance_aicc) importFrom(performance,r2) importFrom(performance,variance_decomposition) importFrom(purrr,as_vector) importFrom(purrr,compact) importFrom(purrr,flatten_chr) importFrom(purrr,flatten_dbl) importFrom(purrr,map) importFrom(purrr,map2) importFrom(purrr,map_chr) importFrom(purrr,map_dbl) importFrom(purrr,map_df) importFrom(purrr,map_if) importFrom(purrr,map_lgl) importFrom(purrr,pmap) importFrom(purrr,reduce) importFrom(rlang,.data) importFrom(rlang,enquo) importFrom(rlang,quo_name) importFrom(scales,brewer_pal) importFrom(scales,grey_pal) importFrom(scales,percent) importFrom(sjlabelled,as_numeric) importFrom(sjlabelled,copy_labels) importFrom(sjlabelled,drop_labels) importFrom(sjlabelled,get_label) importFrom(sjlabelled,get_labels) importFrom(sjlabelled,get_values) importFrom(sjlabelled,response_labels) importFrom(sjlabelled,set_label) importFrom(sjlabelled,set_labels) importFrom(sjlabelled,term_labels) importFrom(sjmisc,add_case) importFrom(sjmisc,add_columns) importFrom(sjmisc,add_variables) importFrom(sjmisc,descr) importFrom(sjmisc,frq) importFrom(sjmisc,group_labels) importFrom(sjmisc,group_var) importFrom(sjmisc,is_empty) importFrom(sjmisc,is_even) importFrom(sjmisc,is_float) importFrom(sjmisc,is_num_fac) importFrom(sjmisc,is_odd) importFrom(sjmisc,rec) importFrom(sjmisc,remove_var) importFrom(sjmisc,replace_na) importFrom(sjmisc,set_na) importFrom(sjmisc,std) importFrom(sjmisc,str_contains) importFrom(sjmisc,str_start) importFrom(sjmisc,to_factor) importFrom(sjmisc,to_value) importFrom(sjmisc,trim) importFrom(sjmisc,typical_value) importFrom(sjmisc,var_rename) importFrom(sjmisc,var_type) importFrom(sjmisc,word_wrap) importFrom(sjstats,cramer) importFrom(sjstats,crosstable_statistics) importFrom(sjstats,mean_n) importFrom(sjstats,phi) importFrom(sjstats,table_values) importFrom(sjstats,weight2) importFrom(sjstats,weighted_sd) importFrom(stats,aov) importFrom(stats,as.formula) importFrom(stats,binomial) importFrom(stats,chisq.test) importFrom(stats,coef) importFrom(stats,complete.cases) importFrom(stats,confint) importFrom(stats,cor) importFrom(stats,cor.test) importFrom(stats,dchisq) importFrom(stats,deviance) importFrom(stats,df) importFrom(stats,dnorm) importFrom(stats,dt) importFrom(stats,family) importFrom(stats,fisher.test) importFrom(stats,fitted) importFrom(stats,formula) importFrom(stats,ftable) importFrom(stats,glm) importFrom(stats,kruskal.test) importFrom(stats,lm) importFrom(stats,loess) importFrom(stats,logLik) importFrom(stats,mad) importFrom(stats,na.omit) importFrom(stats,na.pass) importFrom(stats,nobs) importFrom(stats,pchisq) importFrom(stats,pf) importFrom(stats,pnorm) importFrom(stats,poisson) importFrom(stats,poly) importFrom(stats,ppoints) importFrom(stats,prcomp) importFrom(stats,predict) importFrom(stats,pt) importFrom(stats,qchisq) importFrom(stats,qf) importFrom(stats,qnorm) importFrom(stats,qt) importFrom(stats,quantile) importFrom(stats,reshape) importFrom(stats,residuals) importFrom(stats,rstudent) importFrom(stats,sd) importFrom(stats,setNames) importFrom(stats,shapiro.test) importFrom(stats,summary.lm) importFrom(stats,terms) importFrom(stats,update) importFrom(stats,varimax) importFrom(stats,weighted.mean) importFrom(stats,wilcox.test) importFrom(stats,xtabs) importFrom(tidyr,gather) importFrom(tidyr,nest) importFrom(tidyr,spread) importFrom(tidyr,unnest) importFrom(utils,browseURL) importFrom(utils,packageVersion) importFrom(utils,setTxtProgressBar) importFrom(utils,txtProgressBar) sjPlot/README.md0000644000176200001440000000401614136206624013011 0ustar liggesusers# sjPlot - Data Visualization for Statistics in Social Science [![CRAN_Status_Badge](http://www.r-pkg.org/badges/version/sjPlot)](https://cran.r-project.org/package=sjPlot)    [![Documentation](https://img.shields.io/badge/documentation-sjPlot-orange.svg?colorB=E91E63)](https://strengejacke.github.io/sjPlot/)    [![downloads](https://cranlogs.r-pkg.org/badges/sjPlot)](https://cranlogs.r-pkg.org/)    [![total](https://cranlogs.r-pkg.org/badges/grand-total/sjPlot)](https://cranlogs.r-pkg.org/) Collection of plotting and table output functions for data visualization. Results of various statistical analyses (that are commonly used in social sciences) can be visualized using this package, including simple and cross tabulated frequencies, histograms, box plots, (generalized) linear models, mixed effects models, PCA and correlation matrices, cluster analyses, scatter plots, Likert scales, effects plots of interaction terms in regression models, constructing index or score variables and much more. ## Installation ### Latest development build To install the latest development snapshot (see latest changes below), type the following commands into the R console: ```r library(devtools) devtools::install_github("strengejacke/sjPlot") ``` ### Official, stable release To install the latest stable release from CRAN, type the following command into the R console: ```r install.packages("sjPlot") ``` ## Documentation and examples Please visit [https://strengejacke.github.io/sjPlot/](https://strengejacke.github.io/sjPlot/) for documentation and vignettes. ## Citation In case you want / have to cite my package, please use `citation('sjPlot')` for citation information. Since core functionality of package depends on the [ggplot-package](https://cran.r-project.org/package=ggplot2), consider citing this package as well. [![DOI](https://zenodo.org/badge/DOI/10.5281/zenodo.1308157.svg)](https://doi.org/10.5281/zenodo.1308157) sjPlot/data/0000755000176200001440000000000013446531454012447 5ustar liggesuserssjPlot/data/efc.RData0000644000176200001440000004213413446531454014125 0ustar liggesusers՝]qߏIJ9 1*TRhjIlHI)AIˡ. ÿ( p/gι V_3ܻKw{{򉧺'ϧ?.|rsXq{?9/ pӍ~<=|o11}z G 7~g9Pv^1r<371?Wü+1y5KA FO> }z6̼cz'ZcB?AS|n<_#}DXK?3UӍaEG||] |+w< A#:;wgc="˾9GUҏqƥ_zKBn#ZW?6<_Q/`ޛh ~艾_"e܏OyѮ71ۀE@# :_ A#K q?x9/{s{ u`a܈@z^MSqsK#O1ޫh˟T1_H/z^釶zM?qߒCr)owcݠ/OET_}ճʺ'fK鳀u81ƕW*QrI^': <1X2?'ᾥy)=~3?Yw=v'/bu\puR;_A .\>E)oH?W/E:G[+u~-d?׸W(uR?!}|:W?-swGWP/;U|kS?\RP柕2x.O|y;=q!wCyP5@/> Aj?ެKXKq8к>SYgJWЯ~+qd\K{ ^tq<ȼsYi\tY${J ^J: ΒCr3I?kS1#7|mٝ 9#zzu%_=S?1{s}y?I/8[xCGXǯοf!A~k|n>on>:v6[w;l]W\v`C7^̍L_Զ|kV@Jyj嶰7Vg?/l?fgW`oIwuH;.k!;;%kGx2N0?'puvz6`I^uwߟM7<0VA7rvj7K|&~ .6o&H?v=̷I?ۼ}7Od$/+`~>o#:\?3-7Q>g`nѿB?f2l5nƃf2щtun?ߏۡ?ߣ 4 q)Xv[</;y-{+}26%g7 ?|Y?ܻ:9\morT==; j|zx/o>]k7}w뻷?y֣ova7?ڽ}p2?:\HkwHÓWǷ6n|{\F:_S_ȢnVhvnWbr̕^R0SKuVuprΕ{vY:s6͛ۆu_t\nn0Wu+Wv}N.VV[+G_;\j#"_ ʿz{4ޚOxY~54>KieG߶?.g-VX[_,pf,]ϭ"zrDmG[~\VX{XJmוKvwy헭c-ut|սُl#ڿ9^}9(jt r?<1Py^7޹OūC~s@߱<8_\?7H. o[wc“<?%:])'r@Kø~Sn'9 >/<ѻ*yGE^]I_>nS阯"=eOqihz4^3›Uz8a\~#{79ܢϗ.)@%\c}/4̧h=Dڲ[mtJԺmK[_zNW Oo?1yWX.ҁOIGqvi#vYDK>`=WP?+qq?!bn/yOr֩Oz_'؍aON|$Ax%/Pv,~=oG%7a>^;ҫ D)zЖi=$&bn.P9[.%+~y+%N/'zN0_v+tcyJ>C'&@G1?3r]p?r2%ҁ?RzOc~ܷ5z~Yߍ^% mօ$!'g_X_^zI<گ/}%~yMyWx|`/r7o~'7_<ެ/qֳzL]%<9x~;OdYx{n7SRIu>wB(zЯ3gQX⇐@yq(~cXп̸諮[9o/<%+w uhړs`ǐ1^YeC /\WħY`?Ճ83wg[z +yӑ<ϕ~Wa4\][n,Oo}JI5uԖ+tYG{.vb r%~C?I}MУnL笲Ϟ^8/׼r4cXn=sm?(x#XG_ %L@KuqCx^;cy$nHo3%}q{ ޿yxߡ]M~re=%K?r,|ov?HO#`yd^^Kٻ^ےR'Tfpc vt0^B>NGϭ#؟{)z.dGD wh;?ҋ镍xrK:(m [0._NHHO'wmܸ=Y9H?қrDuRkE9\6Ku#9"ġ+5ym~ y}*gh◜M؁~4߬[m} m'_m5P7{Ooȅ{9c/7o#*g; n4-ڢ>_6xjkf7|\a$g>Y|E$~oժwEszfv~|%ή ڭvM-$lt,Z9:VIq۹v'ed_W?N~VxY/rY9"~%R޺N<],,.h8590]_3Wv{kdY*:yɟx?+_>Ѻf[+F"<G7?/͏=jj_H?U_`mUY+ja<‹dyuvuCϬ]+^\?\}yx{ucٿk6/6յݣͧ{o~o| thg0zxd?/v<ܟݖz9`2~i6r[zce$_em>\yj#W_Q^Ot1ʩJ }3"lg2ڼG+-C_DՋ= =G"G=Vێ֞W|y]|*[;_9uhk]泥9N/|Ζ1vi=':j9P.;wzWf?N8qէwALt}e;9֏sa^6un=lu~N_t|:9<\]|{pnk)>%744ߝ)"]%^oHVkqU+=f)̋Bx1~fE>B>OI{C70+md%lu^J k)[4Wi=HZ6E둎p3M;QOKw6txxpշ?yǵ'}[xR狭#EYgga>x#T{xw޵'{wG~z:ܻbG{Ǐ %x<sQv|zT񫽊Ke֫HW31JZzH/*%R/Ֆh|55m/[$m_F7ˑJq2K?|5o?+Wv^6+;/h'Yfam !˿8ڼ[$-S7yl})oWЬ<曵cz=gr#y'x= I?N?8GV~VN>'W׆o?[g;݈_ϭ/ οnK{mK'p_M~z: Oh8yulɵZ+O8 U>wnqvOFn?;?zʏQse */̛{^((ݓܸv:޺޵id- z}8^I/SR/d[ԥv #k_a Ul<>eyBIjdo'owT_)"vmK@'WVh>kߢZ׃rYX?ݣ^G%wm;M {8 k"ۊʶ||Nɷz|uw"g{_>~ORkw4e3*W{ίl]Z2}^3xAU脪̸.Ym󣂭q=t ?HV9kS+GMS?ǣ7"͒RO~Xק\9֣^󼾍Ow%ǶjG.|{>ϛ_zlIj=sl-[[A]O/OB'[ӳUpi:o=/={G}%WX_ok^j]Gn'|6Wm{\8΋߈g?I۳;=?z<-M[[xقrUjedx{k1{% L~o'6~ꓵSѺ?(/M8^|#wq'/ȷKtuCv_sr̿]5o18:a~ݝ mIʪ%+_z3'x٧=:kS@ڪ:?g#7'r8}Ο뇑fuH~#YldKϵu{jd_FO{3j[y#ɗpOo?FlkWϤu4ċ괰 }W։lgw"o@7ʱSr=~e-ț1Kl,:inuxۨsGlyYhK;f|v=CF=K-n/#[r'yqUS{ѭo|re[Ns`^<`__{tuc_޺ny9zmm }4vƈ o=+y\HE+od0?^tCZäAn }(g7Ͼo?2. qke>`?}:c$'ac=Pwv6,'Fj5k$K406HwN.rTog3ڳô$l>o͏s`:?ϯW]gwnd_tԝ]xZ us2Nxpqum -a/uJxټh7ylU{ya=A:A^ehlN=_$w+ >ub7 }ڀ({5'Gz٩3'njy"pl3սُK۷nl>ruhospGOo~rxh|a1~ana)7,H%)Oh^q7J#g ތGKo4zFG|Xtt+WV\1=>ɋ#[<8fGnz$ED 9l1c=D$(oZ?-ʳQQߏa8ʧ[NroY97Ѹrg}>+7z&%{ߛ~dvcKᅣʼn|K97y/H>=g=[uG:Oh䭮ϻ) pޢ'ߥOU~>@WyO\j-.<,|~O'~\oѹ2~>֥0~E|J<hk=^Ez0>-<Y?};g,ZG# tdwe? ⧼!. 7\e=ŕK֛y!AtOP~Z e7_w:SkZz x%Otxn.yFrgݸ_)ڥ%ɾokOW.|8W呝ؖ]ŷC~JP~% m/NmzJn[|7ɏ8)'d0/DF~ŋݹ>Gޜ|}u/>^Zi='9 PQPq-ܲV~?2.Z_~]۬/~7[8My4&׾U~zZo5Kayߢ/{eOkHѓZ?~7{].{hRgJ'[= /S?CAy03@?_tx ۏ15]G+3~yZoK/Cvơ_o99[>_o/џCT\Zq]ŏyMtyK_A<_wӱ|7Z֡K9d?7!4_<?+~> ޝ_rtc>̃?] 8_B-91/Z|uȯ?HٕH֏Zr^> &;JN_8:*.e_`(Uih(?)?~S^̓Z5T?U. zNPuH9QفuIr)ym~/g~,}.O0./qo}U's=&Gʝ1]y5=,Жh}ӱ>W<%<8w~,z7O7Y tE7 KQ|q/mn-ʹGa[m+/so{Z kV+ݬZ˥٥_f#p)o7KQ?n~}dqn>*y{㳟FǫO77Gr_|bԯw77}-+sZdjA}ӺY?TٺٽniŏZ~m}PI:Y9y#:q\ܖO3H~>W`Y'!n|v0K&+|yܼu'yynߚjoy9jWoe<[럵v\*oF~ZKGs~Mg);ͷ|ZtnܺfnTim[Rȍg8noךg?=OG{g\|b*ڙ_}pד_n{S ~XM oBw6v0߀~/!~C"i==TM6=#='ziOKoRz: }'~}f7_cbo =d#?zn_)!o+/ /2BWj=} q#?xxq["_yA8 _0O|4q$<͗Ԗj3j3S?}3.zCrreGӏh}v1%%y[m o?^C?\ Z_ E z7ySWu#go$旿n||A3%yDr0иGڕu1e7>yc~$%=92_Ň%-y/_rNqyN}InIx̷_iG~)YGބS^~OPzf9(9eWm"q? 3ga~)3?. _AB?a|g''<楫<_п>Ʃח~Tv\<0nW˼+> _֫Oc\s&4yJ|ȼI:AgisySOw'S?%GE#c޽yoc>9nߡYr`=C=9a\:!ƙ;rn1.3o\|P^ek<Żg;c*ƕאzmVo_~7N>70WA76_8P/{\ y(lOm{~#Wr<_\l2.+.oɲ= ~E|_|Ux0_~sZO-\y/a>?C~C;пwxỤm^/ڃs/<~#uz㤻}(w_a5θѸ:b>=2oH~o:r=W/;t5.zcvg\oŗ?EP{'ѡ=o2}N\O̗'\vD~.=޴ x\/;=7ig]M;0|/WL?Au)/ggޣo(%7W/n>Ÿd[8bGH}is˿k3YH^ {S </~:M|Wv"㹚Jwu7Rnݙ8(huO3@un_\dڢCeJ}俬cO9dc>sʸqu'ϛNb]]6S>wwx]ar.Et_dn_y sk>5Ps8yݹٕʸ&ڍqF?qu#v>t 3.p\=-|9{Gv#9tyu0L?$?w>]1E95׍\}ud?ӯ}c]H.|s\MʟÂyB'}gv;zmwl%O2]2ߺ{<^Sy#y9̘~-@!b_|ڑv7v/ :2;0`~K'sv>?}j^wv^.U} _O+9hmw} ϯ㮎wN;gcwp>;qĽgϼGݜ|FyCGyw>tŗ{n LsuoszoQrw~pu9K}Y;yב~9ǣM\1\tujGuGdwrg}ףs~Wq^;;[eqxwoo?xIn<̠T!02_ 3Y|9--9fZ+%rݞOwq7yC'?W=^󝬣i_'kԓ)|=Y^i~2џտc'@k7)]>>M>rM~+1EyK._c3-Ӟ.nWGoOC8aqXkxw_8?+/Vgsybޡ;9t}1{|"|ܿkW߽.y?Wz=)|^FF}ǟwpr3/Ǽf` ?EƺOv:_tײyG={oꗞy+RN=KvdRaŁzJ% hx>ղ3w0y=.aAs.9<oSx<*._~ui]WЇ_'Cɟyc/ o=vr&~7=ɗ3Ty|_c=~x 9<+~6%<%W:Dk_wp|N<ڼ'~O=X2_puwoځwncJxܟd:/Gre\ODGy/FgtG#zԏ<Wx=Eɟtytksu;܌ϣxB]\B'/W7V{76W&3}*݇џtxsJi]~H7u2McՌ'Пfe>ۢms^;?:O.o~h?rhϲٹVߵ_l܏Շ~j]dX K.?y˵zq&y>=w_繥?p80Zow>o͇/&N--\}wEGYz6>y}j]>Zw;>a[^~tO=ãE5ҟm~x~:9Nn3M:du9>9ڻvwx0d?bF'7yyxǿ<66| ?cUOcXCI?k>\r{#wzB??o=|}aO瑾>r"΢?LyîOwrkZƥw=`;ƹgOeW>?Z:wo=Sظi=!]kW6] 8^dZ=|ܼ}֭&~$G6~D6l>'v#|~_~%~Z~SM =G7+'.ټT~D5`m}\Ol{׋ڻZY:s[yqލvuz#:}rMo#?gUi[o9dv7,q~ާm6.w,ݹ뜥Ϛ08B\eWdS6nRugxɺjB|3[ɞgܼzn ߻9^8kHoğ]GH{sc'9O ΍DA<__wm]q|#;dOvϞkΝ=H} '}'_'KJK]Z;z}j7"'[OFi=Om|ϞsO30eT=ۓT#?o[D=':czig g=7+v ?Or|8x._c|kE%eO&{޻~=YxFKgӟv>x*rѶwE>QY {aq{qY?6gO=KC^Fބ[6|QvqvrLrx[zj^/FFGyr\U&v(<nݢuO>Dkտ>O"y9//ʇv6Nz/ҋ|s5RQYg˻|#{8ΞV`}ks-}g.Q=ЪeNX~VʇKom>qtjjK!~]!jo_{;̷3F٫v=<5x=[Ng,kn|q"!ͻN^]-0̇s풥ptn=E@N(.UG@WQ^7k0 #[ŮKwm>ΛOO/ǣ8~Þ>Yt~I菾,tD\{񥛇ӛ_9 oo^|x^tSZ_Uҵ[/>h|҃Ż͗4_ۼ+mkom^om@Wwu <}Jy}+nӟ<99| [}~;@\o.o}|}Y/~ljOkt'/8;_0sjPlot/man/0000755000176200001440000000000014136206671012306 5ustar liggesuserssjPlot/man/sjPlot-themes.Rd0000644000176200001440000001012713567425621015341 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sjplot_themes.R \name{sjPlot-themes} \alias{sjPlot-themes} \alias{theme_sjplot} \alias{theme_sjplot2} \alias{theme_blank} \alias{theme_538} \alias{font_size} \alias{label_angle} \alias{legend_style} \alias{scale_color_sjplot} \alias{scale_fill_sjplot} \alias{sjplot_pal} \alias{show_sjplot_pals} \alias{css_theme} \title{Modify plot appearance} \usage{ theme_sjplot(base_size = 12, base_family = "") theme_sjplot2(base_size = 12, base_family = "") theme_blank(base_size = 12, base_family = "") theme_538(base_size = 12, base_family = "") font_size( title, axis_title.x, axis_title.y, labels.x, labels.y, offset.x, offset.y, base.theme ) label_angle(angle.x, angle.y, base.theme) legend_style(inside, pos, justify, base.theme) scale_color_sjplot(palette = "metro", discrete = TRUE, reverse = FALSE, ...) scale_fill_sjplot(palette = "metro", discrete = TRUE, reverse = FALSE, ...) sjplot_pal(palette = "metro", n = NULL) show_sjplot_pals() css_theme(css.theme = "regression") } \arguments{ \item{base_size}{Base font size.} \item{base_family}{Base font family.} \item{title}{Font size for plot titles.} \item{axis_title.x}{Font size for x-axis titles.} \item{axis_title.y}{Font size for y-axis titles.} \item{labels.x}{Font size for x-axis labels.} \item{labels.y}{Font size for y-axis labels.} \item{offset.x}{Offset for x-axis titles.} \item{offset.y}{Offset for y-axis titles.} \item{base.theme}{Optional ggplot-theme-object, which is needed in case multiple functions should be combined, e.g. \code{theme_sjplot() + label_angle()}. In such cases, use \code{label_angle(base.theme = theme_sjplot())}.} \item{angle.x}{Angle for x-axis labels.} \item{angle.y}{Angle for y-axis labels.} \item{inside}{Logical, use \code{TRUE} to put legend inside the plotting area. See also \code{pos}.} \item{pos}{Position of the legend, if a legend is drawn. \describe{ \item{\emph{Legend outside plot}}{ Use \code{"bottom"}, \code{"top"}, \code{"left"} or \code{"right"} to position the legend above, below, on the left or right side of the diagram. } \item{\emph{Legend inside plot}}{ If \code{inside = TRUE}, legend can be placed inside plot. Use \code{"top left"}, \code{"top right"}, \code{"bottom left"} and \code{"bottom right"} to position legend in any of these corners, or a two-element numeric vector with values from 0-1. See also \code{inside}. } }} \item{justify}{Justification of legend, relative to its position (\code{"center"} or two-element numeric vector with values from 0-1.} \item{palette}{Character name of color palette.} \item{discrete}{Logical, if \code{TRUE}, a discrete colour palette is returned. Else, a gradient palette is returned, where colours of the requested palette are interpolated using \code{\link[grDevices]{colorRampPalette}}.} \item{reverse}{Logical, if \code{TRUE}, order of returned colours is reversed.} \item{...}{Further arguments passed down to ggplot's \code{scale()}-functions.} \item{n}{Numeric, number of colors to be returned. By default, the complete colour palette is returned.} \item{css.theme}{Name of the CSS pre-set theme-style. Can be used for table-functions.} } \description{ Set default plot themes, use pre-defined color scales or modify plot or table appearance. } \details{ When using the \code{colors} argument in function calls (e.g. \code{plot_model()}) or when calling one of the predefined scale-functions (e.g. \code{scale_color_sjplot()}), there are pre-defined colour palettes in this package. Use \code{show_sjplot_pals()} to show all available colour palettes. } \examples{ # prepare data library(sjmisc) data(efc) efc <- to_factor(efc, c161sex, e42dep, c172code) m <- lm(neg_c_7 ~ pos_v_4 + c12hour + e42dep + c172code, data = efc) # create plot-object p <- plot_model(m) # change theme p + theme_sjplot() # change font-size p + font_size(axis_title.x = 30) # apply color theme p + scale_color_sjplot() # show all available colour palettes show_sjplot_pals() # get colour values from specific palette sjplot_pal(pal = "breakfast club") } sjPlot/man/sjp.poly.Rd0000644000176200001440000001301313567425621014356 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sjPlotPolynomials.R \name{sjp.poly} \alias{sjp.poly} \title{Plot polynomials for (generalized) linear regression} \usage{ sjp.poly( x, poly.term, poly.degree, poly.scale = FALSE, fun = NULL, axis.title = NULL, geom.colors = NULL, geom.size = 0.8, show.loess = TRUE, show.loess.ci = TRUE, show.p = TRUE, show.scatter = TRUE, point.alpha = 0.2, point.color = "#404040", loess.color = "#808080" ) } \arguments{ \item{x}{A vector, representing the response variable of a linear (mixed) model; or a linear (mixed) model as returned by \code{\link{lm}} or \code{\link[lme4]{lmer}}.} \item{poly.term}{If \code{x} is a vector, \code{poly.term} should also be a vector, representing the polynomial term (independent variabl) in the model; if \code{x} is a fitted model, \code{poly.term} should be the polynomial term's name as character string. See 'Examples'.} \item{poly.degree}{Numeric, or numeric vector, indicating the degree of the polynomial. If \code{poly.degree} is a numeric vector, multiple polynomial curves for each degree are plotted. See 'Examples'.} \item{poly.scale}{Logical, if \code{TRUE}, \code{poly.term} will be scaled before linear regression is computed. Default is \code{FALSE}. Scaling the polynomial term may have an impact on the resulting p-values.} \item{fun}{Linear function when modelling polynomial terms. Use \code{fun = "lm"} for linear models, or \code{fun = "glm"} for generalized linear models. When \code{x} is not a vector, but a fitted model object, the function is detected automatically. If \code{x} is a vector, \code{fun} defaults to \code{"lm"}.} \item{axis.title}{Character vector of length one or two (depending on the plot function and type), used as title(s) for the x and y axis. If not specified, a default labelling is chosen. \strong{Note:} Some plot types may not support this argument sufficiently. In such cases, use the returned ggplot-object and add axis titles manually with \code{\link[ggplot2]{labs}}. Use \code{axis.title = ""} to remove axis titles.} \item{geom.colors}{user defined color for geoms. See 'Details' in \code{\link{plot_grpfrq}}.} \item{geom.size}{size resp. width of the geoms (bar width, line thickness or point size, depending on plot type and function). Note that bar and bin widths mostly need smaller values than dot sizes.} \item{show.loess}{Logical, if \code{TRUE}, an additional loess-smoothed line is plotted.} \item{show.loess.ci}{Logical, if \code{TRUE}, a confidence region for the loess-smoothed line will be plotted.} \item{show.p}{Logical, if \code{TRUE} (default), p-values for polynomial terms are printed to the console.} \item{show.scatter}{Logical, if TRUE (default), adds a scatter plot of data points to the plot.} \item{point.alpha}{Alpha value of point-geoms in the scatter plots. Only applies, if \code{show.scatter = TRUE}.} \item{point.color}{Color of of point-geoms in the scatter plots. Only applies, if \code{show.scatter = TRUE.}} \item{loess.color}{Color of the loess-smoothed line. Only applies, if \code{show.loess = TRUE}.} } \value{ A ggplot-object. } \description{ This function plots a scatter plot of a term \code{poly.term} against a response variable \code{x} and adds - depending on the amount of numeric values in \code{poly.degree} - multiple polynomial curves. A loess-smoothed line can be added to see which of the polynomial curves fits best to the data. } \details{ For each polynomial degree, a simple linear regression on \code{x} (resp. the extracted response, if \code{x} is a fitted model) is performed, where only the polynomial term \code{poly.term} is included as independent variable. Thus, \code{lm(y ~ x + I(x^2) + ... + I(x^i))} is repeatedly computed for all values in \code{poly.degree}, and the predicted values of the reponse are plotted against the raw values of \code{poly.term}. If \code{x} is a fitted model, other covariates are ignored when finding the best fitting polynomial. \cr \cr This function evaluates raw polynomials, \emph{not orthogonal} polynomials. Polynomials are computed using the \code{\link{poly}} function, with argument \code{raw = TRUE}. \cr \cr To find out which polynomial degree fits best to the data, a loess-smoothed line (in dark grey) can be added (with \code{show.loess = TRUE}). The polynomial curves that comes closest to the loess-smoothed line should be the best fit to the data. } \examples{ library(sjmisc) data(efc) # linear fit. loess-smoothed line indicates a more # or less cubic curve sjp.poly(efc$c160age, efc$quol_5, 1) # quadratic fit sjp.poly(efc$c160age, efc$quol_5, 2) # linear to cubic fit sjp.poly(efc$c160age, efc$quol_5, 1:4, show.scatter = FALSE) # fit sample model fit <- lm(tot_sc_e ~ c12hour + e17age + e42dep, data = efc) # inspect relationship between predictors and response plot_model(fit, type = "slope") # "e17age" does not seem to be linear correlated to response # try to find appropiate polynomial. Grey line (loess smoothed) # indicates best fit. Looks like x^4 has the best fit, # however, only x^3 has significant p-values. sjp.poly(fit, "e17age", 2:4, show.scatter = FALSE) \dontrun{ # fit new model fit <- lm(tot_sc_e ~ c12hour + e42dep + e17age + I(e17age^2) + I(e17age^3), data = efc) # plot marginal effects of polynomial term plot_model(fit, type = "pred", terms = "e17age")} } sjPlot/man/plot_likert.Rd0000644000176200001440000002613513662304072015131 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_likert.R \name{plot_likert} \alias{plot_likert} \title{Plot likert scales as centered stacked bars} \usage{ plot_likert( items, groups = NULL, groups.titles = "auto", title = NULL, legend.title = NULL, legend.labels = NULL, axis.titles = NULL, axis.labels = NULL, catcount = NULL, cat.neutral = NULL, sort.frq = NULL, weight.by = NULL, title.wtd.suffix = NULL, wrap.title = 50, wrap.labels = 30, wrap.legend.title = 30, wrap.legend.labels = 28, geom.size = 0.6, geom.colors = "BrBG", cat.neutral.color = "grey70", intercept.line.color = "grey50", reverse.colors = FALSE, values = "show", show.n = TRUE, show.legend = TRUE, show.prc.sign = FALSE, grid.range = 1, grid.breaks = 0.2, expand.grid = TRUE, digits = 1, reverse.scale = FALSE, coord.flip = TRUE, sort.groups = TRUE, legend.pos = "bottom", rel_heights = 1, group.legend.options = list(nrow = NULL, byrow = TRUE), cowplot.options = list(label_x = 0.01, hjust = 0, align = "v") ) } \arguments{ \item{items}{Data frame, or a grouped data frame, with each column representing one item.} \item{groups}{(optional) Must be a vector of same length as \code{ncol(items)}, where each item in this vector represents the group number of the related columns of \code{items}. See 'Examples'.} \item{groups.titles}{(optional, only used if groups are supplied) Titles for each factor group that will be used as table caption for each component-table. Must be a character vector of same length as \code{length(unique(groups))}. Default is \code{"auto"}, which means that each table has a standard caption \emph{Component x}. Use \code{NULL} to use names as supplied to \code{groups} and use \code{FALSE} to suppress table captions.} \item{title}{character vector, used as plot title. Depending on plot type and function, will be set automatically. If \code{title = ""}, no title is printed. For effect-plots, may also be a character vector of length > 1, to define titles for each sub-plot or facet.} \item{legend.title}{character vector, used as title for the plot legend.} \item{legend.labels}{character vector with labels for the guide/legend.} \item{axis.titles}{character vector of length one or two, defining the title(s) for the x-axis and y-axis.} \item{axis.labels}{character vector with labels used as axis labels. Optional argument, since in most cases, axis labels are set automatically.} \item{catcount}{optional, amount of categories of \code{items} (e.g. \emph{"strongly disagree", "disagree", "agree"} and \emph{"strongly agree"} would be \code{catcount = 4}). Note that this argument only applies to "valid" answers, i.e. if you have an additional neutral category (see \code{cat.neutral}) like \emph{"don't know"}, this won't count for \code{catcount} (e.g. "strongly disagree", "disagree", "agree", "strongly agree" and neutral category "don't know" would still mean that \code{catcount = 4}). See 'Note'.} \item{cat.neutral}{If there's a neutral category (like "don't know" etc.), specify the index number (value) for this category. Else, set \code{cat.neutral = NULL} (default). The proportions of neutral category answers are plotted as grey bars on the left side of the figure.} \item{sort.frq}{Indicates whether the items of \code{items} should be ordered by total sum of positive or negative answers. \describe{ \item{\code{"pos.asc"}}{to order ascending by sum of positive answers} \item{\code{"pos.desc"}}{to order descending by sum of positive answers} \item{\code{"neg.asc"}}{for sorting ascending negative answers} \item{\code{"neg.desc"}}{for sorting descending negative answers} \item{\code{NULL}}{(default) for no sorting} }} \item{weight.by}{Vector of weights that will be applied to weight all cases. Must be a vector of same length as the input vector. Default is \code{NULL}, so no weights are used.} \item{title.wtd.suffix}{Suffix (as string) for the title, if \code{weight.by} is specified, e.g. \code{title.wtd.suffix=" (weighted)"}. Default is \code{NULL}, so title will not have a suffix when cases are weighted.} \item{wrap.title}{numeric, determines how many chars of the plot title are displayed in one line and when a line break is inserted.} \item{wrap.labels}{numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{wrap.legend.title}{numeric, determines how many chars of the legend's title are displayed in one line and when a line break is inserted.} \item{wrap.legend.labels}{numeric, determines how many chars of the legend labels are displayed in one line and when a line break is inserted.} \item{geom.size}{size resp. width of the geoms (bar width, line thickness or point size, depending on plot type and function). Note that bar and bin widths mostly need smaller values than dot sizes.} \item{geom.colors}{user defined color for geoms. See 'Details' in \code{\link{plot_grpfrq}}.} \item{cat.neutral.color}{Color of the neutral category, if plotted (see \code{cat.neutral}).} \item{intercept.line.color}{Color of the vertical intercept line that divides positive and negative values.} \item{reverse.colors}{logical, if \code{TRUE}, the color scale from \code{geom.colors} will be reversed, so positive and negative values switch colors.} \item{values}{Determines style and position of percentage value labels on the bars: \describe{ \item{\code{"show"}}{(default) shows percentage value labels in the middle of each category bar} \item{\code{"hide"}}{hides the value labels, so no percentage values on the bars are printed} \item{\code{"sum.inside"}}{shows the sums of percentage values for both negative and positive values and prints them inside the end of each bar} \item{\code{"sum.outside"}}{shows the sums of percentage values for both negative and positive values and prints them outside the end of each bar} }} \item{show.n}{logical, if \code{TRUE}, adds total number of cases for each group or category to the labels.} \item{show.legend}{logical, if \code{TRUE}, and depending on plot type and function, a legend is added to the plot.} \item{show.prc.sign}{logical, if \code{TRUE}, \%-signs for value labels are shown.} \item{grid.range}{Numeric, limits of the x-axis-range, as proportion of 100. Default is 1, so the x-scale ranges from zero to 100\% on both sides from the center. Can alternatively be supplied as a vector of 2 positive numbers (e.g. \code{grid.range = c(1, .8)}) to set the left and right limit separately. You can use values beyond 1 (100\%) in case bar labels are not printed because they exceed the axis range. E.g. \code{grid.range = 1.4} will set the axis from -140 to +140\%, however, only (valid) axis labels from -100 to +100\% are printed. Neutral categories are adjusted to the most left limit.} \item{grid.breaks}{numeric; sets the distance between breaks for the axis, i.e. at every \code{grid.breaks}'th position a major grid is being printed.} \item{expand.grid}{logical, if \code{TRUE}, the plot grid is expanded, i.e. there is a small margin between axes and plotting region. Default is \code{FALSE}.} \item{digits}{Numeric, amount of digits after decimal point when rounding estimates or values.} \item{reverse.scale}{logical, if \code{TRUE}, the ordering of the categories is reversed, so positive and negative values switch position.} \item{coord.flip}{logical, if \code{TRUE}, the x and y axis are swapped.} \item{sort.groups}{(optional, only used if groups are supplied) logical, if groups should be sorted according to the values supplied to \code{groups}. Defaults to \code{TRUE}.} \item{legend.pos}{(optional, only used if groups are supplied) Defines the legend position. Possible values are \code{c("bottom", "top", "both", "all", "none")}. If the is only one group or this option is set to \code{"all"} legends will be printed as defined with \code{\link{set_theme}}.} \item{rel_heights}{(optional, only used if groups are supplied) This option can be used to adjust the height of the subplots. The bars in subplots can have different heights due to a differing number of items or due to legend placement. This can be adjusted here. Takes a vector of numbers, one for each plot. Values are evaluated relative to each other.} \item{group.legend.options}{(optional, only used if groups are supplied) List of options to be passed to \code{\link[ggplot2]{guide_legend}}. The most notable options are \code{byrow=T} (default), this will order the categories row wise. And with \code{group.legend.options = list(nrow = 1)} all categories can be forced to be on a single row.} \item{cowplot.options}{(optional, only used if groups are supplied) List of label options to be passed to \code{\link[cowplot]{plot_grid}}.} } \value{ A ggplot-object. } \description{ Plot likert scales as centered stacked bars. } \note{ Note that only even numbers of categories are possible to plot, so the "positive" and "negative" values can be splitted into two halfs. A neutral category (like "don't know") can be used, but must be indicated by \code{cat.neutral}. \cr \cr The \code{catcount}-argument indicates how many item categories are in the Likert scale. Normally, this argument can be ignored because the amount of valid categories is retrieved automatically. However, sometimes (for instance, if a certain category is missing in all items), auto-detection of the amount of categories fails. In such cases, specify the amount of categories with the \code{catcount}-argument. } \examples{ library(sjmisc) data(efc) # find all variables from COPE-Index, which all have a "cop" in their # variable name, and then plot that subset as likert-plot mydf <- find_var(efc, pattern = "cop", out = "df") plot_likert(mydf) plot_likert( mydf, grid.range = c(1.2, 1.4), expand.grid = FALSE, values = "sum.outside", show.prc.sign = TRUE ) # Plot in groups plot_likert(mydf, c(2,1,1,1,1,2,2,2,1)) if (require("parameters") && require("nFactors")) { groups <- parameters::principal_components(mydf) plot_likert(mydf, groups = parameters::closest_component(groups)) } plot_likert(mydf, c(rep("B", 4), rep("A", 5)), sort.groups = FALSE, grid.range = c(0.9, 1.1), geom.colors = "RdBu", rel_heights = c(6, 8), wrap.labels = 40, reverse.scale = TRUE) # control legend items six_cat_example = data.frame( matrix(sample(1:6, 600, replace = TRUE), ncol = 6) ) \dontrun{ six_cat_example <- six_cat_example \%>\% dplyr::mutate_all(~ordered(.,labels = c("+++","++","+","-","--","---"))) # Old default plot_likert( six_cat_example, groups = c(1, 1, 1, 2, 2, 2), group.legend.options = list(nrow = 2, byrow = FALSE) ) # New default plot_likert(six_cat_example, groups = c(1, 1, 1, 2, 2, 2)) # Single row plot_likert( six_cat_example, groups = c(1, 1, 1, 2, 2, 2), group.legend.options = list(nrow = 1) )} } sjPlot/man/plot_models.Rd0000644000176200001440000002757414150124646015132 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_models.R \name{plot_models} \alias{plot_models} \title{Forest plot of multiple regression models} \usage{ plot_models( ..., transform = NULL, std.est = NULL, rm.terms = NULL, title = NULL, m.labels = NULL, legend.title = "Dependent Variables", legend.pval.title = "p-level", axis.labels = NULL, axis.title = NULL, axis.lim = NULL, wrap.title = 50, wrap.labels = 25, wrap.legend.title = 20, grid.breaks = NULL, dot.size = 3, line.size = NULL, value.size = NULL, spacing = 0.4, colors = "Set1", show.values = FALSE, show.legend = TRUE, show.intercept = FALSE, show.p = TRUE, p.shape = FALSE, p.threshold = c(0.05, 0.01, 0.001), p.adjust = NULL, ci.lvl = 0.95, robust = FALSE, vcov.fun = NULL, vcov.type = c("HC3", "const", "HC", "HC0", "HC1", "HC2", "HC4", "HC4m", "HC5"), vcov.args = NULL, vline.color = NULL, digits = 2, grid = FALSE, auto.label = TRUE, prefix.labels = c("none", "varname", "label") ) } \arguments{ \item{...}{One or more regression models, including glm's or mixed models. May also be a \code{list} with fitted models. See 'Examples'.} \item{transform}{A character vector, naming a function that will be applied on estimates and confidence intervals. By default, \code{transform} will automatically use \code{"exp"} as transformation for applicable classes of \code{model} (e.g. logistic or poisson regression). Estimates of linear models remain untransformed. Use \code{NULL} if you want the raw, non-transformed estimates.} \item{std.est}{Choose whether standardized coefficients should be used for plotting. Default is no standardization (\code{std.est = NULL}). May be \code{"std"} for standardized beta values or \code{"std2"}, where standardization is done by rescaling estimates by dividing them by two sd.} \item{rm.terms}{Character vector with names that indicate which terms should be removed from the plot. Counterpart to \code{terms}. \code{rm.terms = "t_name"} would remove the term \emph{t_name}. Default is \code{NULL}, i.e. all terms are used. For factors, levels that should be removed from the plot need to be explicitely indicated in square brackets, and match the model's coefficient names, e.g. \code{rm.terms = "t_name [2,3]"} would remove the terms \code{"t_name2"} and \code{"t_name3"} (assuming that the variable \code{t_name} was categorical and has at least the factor levels \code{2} and \code{3}). Another example for the \emph{iris} dataset would be \code{rm.terms = "Species [versicolor,virginica]"}. Note that the \code{rm.terms}-argument does not apply to \emph{Marginal Effects} plots.} \item{title}{Character vector, used as plot title. By default, \code{\link[sjlabelled]{response_labels}} is called to retrieve the label of the dependent variable, which will be used as title. Use \code{title = ""} to remove title.} \item{m.labels}{Character vector, used to indicate the different models in the plot's legend. If not specified, the labels of the dependent variables for each model are used.} \item{legend.title}{Character vector, used as legend title for plots that have a legend.} \item{legend.pval.title}{Character vector, used as title of the plot legend that indicates the p-values. Default is \code{"p-level"}. Only applies if \code{p.shape = TRUE}.} \item{axis.labels}{Character vector with labels for the model terms, used as axis labels. By default, \code{\link[sjlabelled]{term_labels}} is called to retrieve the labels of the coefficients, which will be used as axis labels. Use \code{axis.labels = ""} or \code{auto.label = FALSE} to use the variable names as labels instead. If \code{axis.labels} is a named vector, axis labels (by default, the names of the model's coefficients) will be matched with the names of \code{axis.label}. This ensures that labels always match the related axis value, no matter in which way axis labels are sorted.} \item{axis.title}{Character vector of length one or two (depending on the plot function and type), used as title(s) for the x and y axis. If not specified, a default labelling is chosen. \strong{Note:} Some plot types may not support this argument sufficiently. In such cases, use the returned ggplot-object and add axis titles manually with \code{\link[ggplot2]{labs}}. Use \code{axis.title = ""} to remove axis titles.} \item{axis.lim}{Numeric vector of length 2, defining the range of the plot axis. Depending on plot-type, may effect either x- or y-axis. For \emph{Marginal Effects} plots, \code{axis.lim} may also be a list of two vectors of length 2, defining axis limits for both the x and y axis.} \item{wrap.title}{Numeric, determines how many chars of the plot title are displayed in one line and when a line break is inserted.} \item{wrap.labels}{Numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{wrap.legend.title}{numeric, determines how many chars of the legend's title are displayed in one line and when a line break is inserted.} \item{grid.breaks}{Numeric value or vector; if \code{grid.breaks} is a single value, sets the distance between breaks for the axis at every \code{grid.breaks}'th position, where a major grid line is plotted. If \code{grid.breaks} is a vector, values will be used to define the axis positions of the major grid lines.} \item{dot.size}{Numeric, size of the dots that indicate the point estimates.} \item{line.size}{Numeric, size of the lines that indicate the error bars.} \item{value.size}{Numeric, indicates the size of value labels. Can be used for all plot types where the argument \code{show.values} is applicable, e.g. \code{value.size = 4}.} \item{spacing}{Numeric, spacing between the dots and error bars of the plotted fitted models. Default is 0.3.} \item{colors}{May be a character vector of color values in hex-format, valid color value names (see \code{demo("colors")}) or a name of a pre-defined color palette. Following options are valid for the \code{colors} argument: \itemize{ \item If not specified, a default color brewer palette will be used, which is suitable for the plot style. \item If \code{"gs"}, a greyscale will be used. \item If \code{"bw"}, and plot-type is a line-plot, the plot is black/white and uses different line types to distinguish groups (see \href{https://strengejacke.github.io/sjPlot/articles/blackwhitefigures.html}{this package-vignette}). \item If \code{colors} is any valid color brewer palette name, the related palette will be used. Use \code{RColorBrewer::display.brewer.all()} to view all available palette names. \item There are some pre-defined color palettes in this package, see \code{\link{sjPlot-themes}} for details. \item Else specify own color values or names as vector (e.g. \code{colors = "#00ff00"} or \code{colors = c("firebrick", "blue")}). }} \item{show.values}{Logical, whether values should be plotted or not.} \item{show.legend}{For \emph{Marginal Effects} plots, shows or hides the legend.} \item{show.intercept}{Logical, if \code{TRUE}, the intercept of the fitted model is also plotted. Default is \code{FALSE}. If \code{transform = "exp"}, please note that due to exponential transformation of estimates, the intercept in some cases is non-finite and the plot can not be created.} \item{show.p}{Logical, adds asterisks that indicate the significance level of estimates to the value labels.} \item{p.shape}{Logical, if \code{TRUE}, significant levels are distinguished by different point shapes and a related legend is plotted. Default is \code{FALSE}.} \item{p.threshold}{Numeric vector of length 3, indicating the treshold for annotating p-values with asterisks. Only applies if \code{p.style = "asterisk"}.} \item{p.adjust}{Character vector, if not \code{NULL}, indicates the method to adjust p-values. See \code{\link[stats]{p.adjust}} for details.} \item{ci.lvl}{Numeric, the level of the confidence intervals (error bars). Use \code{ci.lvl = NA} to remove error bars. For \code{stanreg}-models, \code{ci.lvl} defines the (outer) probability for the \emph{credible interval} that is plotted (see \code{\link[bayestestR]{ci}}). By default, \code{stanreg}-models are printed with two intervals: the "inner" interval, which defaults to the 50\%-CI; and the "outer" interval, which defaults to the 89\%-CI. \code{ci.lvl} affects only the outer interval in such cases. See \code{prob.inner} and \code{prob.outer} under the \code{...}-argument for more details.} \item{robust}{Logical, shortcut for arguments \code{vcov.fun} and \code{vcov.type}. If \code{TRUE}, uses \code{vcov.fun = "vcovHC"} and \code{vcov.type = "HC3"} as default, that is, \code{\link[sandwich]{vcovHC}} with default-type is called (see \code{\link[parameters]{standard_error_robust}} and \href{https://easystats.github.io/parameters/articles/model_parameters_robust.html}{this vignette} for further details).} \item{vcov.fun}{Character vector, indicating the name of the \code{vcov*()}-function from the \pkg{sandwich} or \pkg{clubSandwich} package, e.g. \code{vcov.fun = "vcovCL"}, if robust standard errors are required.} \item{vcov.type}{Character vector, specifying the estimation type for the robust covariance matrix estimation (see \code{\link[sandwich:vcovHC]{vcovHC()}} or \code{clubSandwich::vcovCR()} for details).} \item{vcov.args}{List of named vectors, used as additional arguments that are passed down to \code{vcov.fun}.} \item{vline.color}{Color of the vertical "zero effect" line. Default color is inherited from the current theme.} \item{digits}{Numeric, amount of digits after decimal point when rounding estimates or values.} \item{grid}{Logical, if \code{TRUE}, multiple plots are plotted as grid layout.} \item{auto.label}{Logical, if \code{TRUE} (the default), and \href{https://strengejacke.github.io/sjlabelled/articles/intro_sjlabelled.html}{data is labelled}, \code{\link[sjlabelled]{term_labels}} is called to retrieve the labels of the coefficients, which will be used as predictor labels. If data is not labelled, \href{https://easystats.github.io/parameters/reference/format_parameters.html}{format_parameters()} is used to create pretty labels. If \code{auto.label = FALSE}, original variable names and value labels (factor levels) are used.} \item{prefix.labels}{Indicates whether the value labels of categorical variables should be prefixed, e.g. with the variable name or variable label. See argument \code{prefix} in \code{\link[sjlabelled]{term_labels}} for details.} } \value{ A ggplot-object. } \description{ Plot and compare regression coefficients with confidence intervals of multiple regression models in one plot. } \examples{ data(efc) # fit three models fit1 <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) fit2 <- lm(neg_c_7 ~ c160age + c12hour + c161sex + c172code, data = efc) fit3 <- lm(tot_sc_e ~ c160age + c12hour + c161sex + c172code, data = efc) # plot multiple models plot_models(fit1, fit2, fit3, grid = TRUE) # plot multiple models with legend labels and # point shapes instead of value labels plot_models( fit1, fit2, fit3, axis.labels = c( "Carer's Age", "Hours of Care", "Carer's Sex", "Educational Status" ), m.labels = c("Barthel Index", "Negative Impact", "Services used"), show.values = FALSE, show.p = FALSE, p.shape = TRUE ) \dontrun{ # plot multiple models from nested lists argument all.models <- list() all.models[[1]] <- fit1 all.models[[2]] <- fit2 all.models[[3]] <- fit3 plot_models(all.models) # plot multiple models with different predictors (stepwise inclusion), # standardized estimates fit1 <- lm(mpg ~ wt + cyl + disp + gear, data = mtcars) fit2 <- update(fit1, . ~ . + hp) fit3 <- update(fit2, . ~ . + am) plot_models(fit1, fit2, fit3, std.est = "std2") } } sjPlot/man/plot_xtab.Rd0000644000176200001440000002170013733137536014576 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_xtab.R \name{plot_xtab} \alias{plot_xtab} \title{Plot contingency tables} \usage{ plot_xtab( x, grp, type = c("bar", "line"), margin = c("col", "cell", "row"), bar.pos = c("dodge", "stack"), title = "", title.wtd.suffix = NULL, axis.titles = NULL, axis.labels = NULL, legend.title = NULL, legend.labels = NULL, weight.by = NULL, rev.order = FALSE, show.values = TRUE, show.n = TRUE, show.prc = TRUE, show.total = TRUE, show.legend = TRUE, show.summary = FALSE, summary.pos = "r", drop.empty = TRUE, string.total = "Total", wrap.title = 50, wrap.labels = 15, wrap.legend.title = 20, wrap.legend.labels = 20, geom.size = 0.7, geom.spacing = 0.1, geom.colors = "Paired", dot.size = 3, smooth.lines = FALSE, grid.breaks = 0.2, expand.grid = FALSE, ylim = NULL, vjust = "bottom", hjust = "center", y.offset = NULL, coord.flip = FALSE ) } \arguments{ \item{x}{A vector of values (variable) describing the bars which make up the plot.} \item{grp}{Grouping variable of same length as \code{x}, where \code{x} is grouped into the categories represented by \code{grp}.} \item{type}{Plot type. may be either \code{"bar"} (default) for bar charts, or \code{"line"} for line diagram.} \item{margin}{Indicates which data of the proportional table should be plotted. Use \code{"row"} for calculating row percentages, \code{"col"} for column percentages and \code{"cell"} for cell percentages. If \code{margin = "col"}, an additional bar with the total sum of each column can be added to the plot (see \code{show.total}).} \item{bar.pos}{Indicates whether bars should be positioned side-by-side (default), or stacked (\code{bar.pos = "stack"}). May be abbreviated.} \item{title}{character vector, used as plot title. Depending on plot type and function, will be set automatically. If \code{title = ""}, no title is printed. For effect-plots, may also be a character vector of length > 1, to define titles for each sub-plot or facet.} \item{title.wtd.suffix}{Suffix (as string) for the title, if \code{weight.by} is specified, e.g. \code{title.wtd.suffix=" (weighted)"}. Default is \code{NULL}, so title will not have a suffix when cases are weighted.} \item{axis.titles}{character vector of length one or two, defining the title(s) for the x-axis and y-axis.} \item{axis.labels}{character vector with labels used as axis labels. Optional argument, since in most cases, axis labels are set automatically.} \item{legend.title}{character vector, used as title for the plot legend.} \item{legend.labels}{character vector with labels for the guide/legend.} \item{weight.by}{Vector of weights that will be applied to weight all cases. Must be a vector of same length as the input vector. Default is \code{NULL}, so no weights are used.} \item{rev.order}{Logical, if \code{TRUE}, order of categories (groups) is reversed.} \item{show.values}{Logical, whether values should be plotted or not.} \item{show.n}{logical, if \code{TRUE}, adds total number of cases for each group or category to the labels.} \item{show.prc}{logical, if \code{TRUE} (default), percentage values are plotted to each bar If \code{FALSE}, percentage values are removed.} \item{show.total}{When \code{margin = "col"}, an additional bar with the sum within each category and it's percentages will be added to each category.} \item{show.legend}{logical, if \code{TRUE}, and depending on plot type and function, a legend is added to the plot.} \item{show.summary}{logical, if \code{TRUE} (default), a summary with chi-squared statistics (see \code{\link{chisq.test}}), Cramer's V or Phi-value etc. is shown. If a cell contains expected values lower than five (or lower than 10 if df is 1), the Fisher's exact test (see \code{\link{fisher.test}}) is computed instead of chi-squared test. If the table's matrix is larger than 2x2, Fisher's exact test with Monte Carlo simulation is computed.} \item{summary.pos}{position of the model summary which is printed when \code{show.summary} is \code{TRUE}. Default is \code{"r"}, i.e. it's printed to the upper right corner. Use \code{"l"} for upper left corner.} \item{drop.empty}{Logical, if \code{TRUE} and the variable's values are labeled, values that have no observations are still printed in the table (with frequency \code{0}). If \code{FALSE}, values / factor levels with no occurrence in the data are omitted from the output.} \item{string.total}{String for the legend label when a total-column is added. Only applies if \code{show.total = TRUE}. Default is \code{"Total"}.} \item{wrap.title}{numeric, determines how many chars of the plot title are displayed in one line and when a line break is inserted.} \item{wrap.labels}{numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{wrap.legend.title}{numeric, determines how many chars of the legend's title are displayed in one line and when a line break is inserted.} \item{wrap.legend.labels}{numeric, determines how many chars of the legend labels are displayed in one line and when a line break is inserted.} \item{geom.size}{size resp. width of the geoms (bar width, line thickness or point size, depending on plot type and function). Note that bar and bin widths mostly need smaller values than dot sizes.} \item{geom.spacing}{the spacing between geoms (i.e. bar spacing)} \item{geom.colors}{user defined color for geoms. See 'Details' in \code{\link{plot_grpfrq}}.} \item{dot.size}{Dot size, only applies, when argument \code{type = "line"}.} \item{smooth.lines}{prints a smooth line curve. Only applies, when argument \code{type = "line"}.} \item{grid.breaks}{numeric; sets the distance between breaks for the axis, i.e. at every \code{grid.breaks}'th position a major grid is being printed.} \item{expand.grid}{logical, if \code{TRUE}, the plot grid is expanded, i.e. there is a small margin between axes and plotting region. Default is \code{FALSE}.} \item{ylim}{numeric vector of length two, defining lower and upper axis limits of the y scale. By default, this argument is set to \code{NULL}, i.e. the y-axis fits to the required range of the data.} \item{vjust}{character vector, indicating the vertical position of value labels. Allowed are same values as for \code{vjust} aesthetics from \code{ggplot2}: "left", "center", "right", "bottom", "middle", "top" and new options like "inward" and "outward", which align text towards and away from the center of the plot respectively.} \item{hjust}{character vector, indicating the horizontal position of value labels. Allowed are same values as for \code{vjust} aesthetics from \code{ggplot2}: "left", "center", "right", "bottom", "middle", "top" and new options like "inward" and "outward", which align text towards and away from the center of the plot respectively.} \item{y.offset}{numeric, offset for text labels when their alignment is adjusted to the top/bottom of the geom (see \code{hjust} and \code{vjust}).} \item{coord.flip}{logical, if \code{TRUE}, the x and y axis are swapped.} } \value{ A ggplot-object. } \description{ Plot proportional crosstables (contingency tables) of two variables as ggplot diagram. } \examples{ # create 4-category-items grp <- sample(1:4, 100, replace = TRUE) # create 3-category-items x <- sample(1:3, 100, replace = TRUE) # plot "cross tablulation" of x and grp plot_xtab(x, grp) # plot "cross tablulation" of x and y, including labels plot_xtab(x, grp, axis.labels = c("low", "mid", "high"), legend.labels = c("Grp 1", "Grp 2", "Grp 3", "Grp 4")) # plot "cross tablulation" of x and grp # as stacked proportional bars plot_xtab(x, grp, margin = "row", bar.pos = "stack", show.summary = TRUE, coord.flip = TRUE) # example with vertical labels library(sjmisc) library(sjlabelled) data(efc) set_theme(geom.label.angle = 90) plot_xtab(efc$e42dep, efc$e16sex, vjust = "center", hjust = "bottom") # grouped bars with EUROFAMCARE sample dataset # dataset was importet from an SPSS-file, # see ?sjmisc::read_spss data(efc) efc.val <- get_labels(efc) efc.var <- get_label(efc) plot_xtab(efc$e42dep, efc$e16sex, title = efc.var['e42dep'], axis.labels = efc.val[['e42dep']], legend.title = efc.var['e16sex'], legend.labels = efc.val[['e16sex']]) plot_xtab(efc$e16sex, efc$e42dep, title = efc.var['e16sex'], axis.labels = efc.val[['e16sex']], legend.title = efc.var['e42dep'], legend.labels = efc.val[['e42dep']]) # ------------------------------- # auto-detection of labels works here # so no need to specify labels. For # title-auto-detection, use NULL # ------------------------------- plot_xtab(efc$e16sex, efc$e42dep, title = NULL) plot_xtab(efc$e16sex, efc$e42dep, margin = "row", bar.pos = "stack", coord.flip = TRUE) } sjPlot/man/plot_stackfrq.Rd0000644000176200001440000001172613567425621015465 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_stackfrq.R \name{plot_stackfrq} \alias{plot_stackfrq} \title{Plot stacked proportional bars} \usage{ plot_stackfrq( items, title = NULL, legend.title = NULL, legend.labels = NULL, axis.titles = NULL, axis.labels = NULL, weight.by = NULL, sort.frq = NULL, wrap.title = 50, wrap.labels = 30, wrap.legend.title = 30, wrap.legend.labels = 28, geom.size = 0.5, geom.colors = "Blues", show.prc = TRUE, show.n = FALSE, show.total = TRUE, show.axis.prc = TRUE, show.legend = TRUE, grid.breaks = 0.2, expand.grid = FALSE, digits = 1, vjust = "center", coord.flip = TRUE ) } \arguments{ \item{items}{Data frame, or a grouped data frame, with each column representing one item.} \item{title}{character vector, used as plot title. Depending on plot type and function, will be set automatically. If \code{title = ""}, no title is printed. For effect-plots, may also be a character vector of length > 1, to define titles for each sub-plot or facet.} \item{legend.title}{character vector, used as title for the plot legend.} \item{legend.labels}{character vector with labels for the guide/legend.} \item{axis.titles}{character vector of length one or two, defining the title(s) for the x-axis and y-axis.} \item{axis.labels}{character vector with labels used as axis labels. Optional argument, since in most cases, axis labels are set automatically.} \item{weight.by}{Vector of weights that will be applied to weight all cases. Must be a vector of same length as the input vector. Default is \code{NULL}, so no weights are used.} \item{sort.frq}{Indicates whether the \code{items} should be ordered by by highest count of first or last category of \code{items}. \describe{ \item{\code{"first.asc"}}{to order ascending by lowest count of first category,} \item{\code{"first.desc"}}{to order descending by lowest count of first category,} \item{\code{"last.asc"}}{to order ascending by lowest count of last category,} \item{\code{"last.desc"}}{to order descending by lowest count of last category,} \item{\code{NULL}}{(default) for no sorting.} }} \item{wrap.title}{numeric, determines how many chars of the plot title are displayed in one line and when a line break is inserted.} \item{wrap.labels}{numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{wrap.legend.title}{numeric, determines how many chars of the legend's title are displayed in one line and when a line break is inserted.} \item{wrap.legend.labels}{numeric, determines how many chars of the legend labels are displayed in one line and when a line break is inserted.} \item{geom.size}{size resp. width of the geoms (bar width, line thickness or point size, depending on plot type and function). Note that bar and bin widths mostly need smaller values than dot sizes.} \item{geom.colors}{user defined color for geoms. See 'Details' in \code{\link{plot_grpfrq}}.} \item{show.prc}{Logical, whether percentage values should be plotted or not.} \item{show.n}{Logical, whether count values hould be plotted or not.} \item{show.total}{logical, if \code{TRUE}, adds total number of cases for each group or category to the labels.} \item{show.axis.prc}{Logical, if \code{TRUE} (default), the percentage values at the x-axis are shown.} \item{show.legend}{logical, if \code{TRUE}, and depending on plot type and function, a legend is added to the plot.} \item{grid.breaks}{numeric; sets the distance between breaks for the axis, i.e. at every \code{grid.breaks}'th position a major grid is being printed.} \item{expand.grid}{logical, if \code{TRUE}, the plot grid is expanded, i.e. there is a small margin between axes and plotting region. Default is \code{FALSE}.} \item{digits}{Numeric, amount of digits after decimal point when rounding estimates or values.} \item{vjust}{character vector, indicating the vertical position of value labels. Allowed are same values as for \code{vjust} aesthetics from \code{ggplot2}: "left", "center", "right", "bottom", "middle", "top" and new options like "inward" and "outward", which align text towards and away from the center of the plot respectively.} \item{coord.flip}{logical, if \code{TRUE}, the x and y axis are swapped.} } \value{ A ggplot-object. } \description{ Plot items (variables) of a scale as stacked proportional bars. This function is useful when several items with identical scale/categoroies should be plotted to compare the distribution of answers. } \examples{ # Data from the EUROFAMCARE sample dataset library(sjmisc) data(efc) # recveive first item of COPE-index scale start <- which(colnames(efc) == "c82cop1") # recveive first item of COPE-index scale end <- which(colnames(efc) == "c90cop9") # auto-detection of labels plot_stackfrq(efc[, start:end]) # works on grouped data frames as well library(dplyr) efc \%>\% group_by(c161sex) \%>\% select(start:end) \%>\% plot_stackfrq() } sjPlot/man/sjp.chi2.Rd0000644000176200001440000000445513733137536014232 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sjPlotPearsonsChi2Test.R \name{sjp.chi2} \alias{sjp.chi2} \title{Plot Pearson's Chi2-Test of multiple contingency tables} \usage{ sjp.chi2( df, title = "Pearson's Chi2-Test of Independence", axis.labels = NULL, wrap.title = 50, wrap.labels = 20, show.legend = FALSE, legend.title = NULL ) } \arguments{ \item{df}{A data frame with (dichotomous) factor variables.} \item{title}{character vector, used as plot title. Depending on plot type and function, will be set automatically. If \code{title = ""}, no title is printed. For effect-plots, may also be a character vector of length > 1, to define titles for each sub-plot or facet.} \item{axis.labels}{character vector with labels used as axis labels. Optional argument, since in most cases, axis labels are set automatically.} \item{wrap.title}{numeric, determines how many chars of the plot title are displayed in one line and when a line break is inserted.} \item{wrap.labels}{numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{show.legend}{logical, if \code{TRUE}, and depending on plot type and function, a legend is added to the plot.} \item{legend.title}{character vector, used as title for the plot legend.} } \value{ A ggplot-object. } \description{ Plot p-values of Pearson's Chi2-tests for multiple contingency tables as ellipses or tiles. Requires a data frame with dichotomous (dummy) variables. Calculation of Chi2-matrix taken from \href{https://talesofr.wordpress.com/2013/05/05/ridiculously-photogenic-factors-heatmap-with-p-values/}{Tales of R}. } \examples{ # create data frame with 5 dichotomous (dummy) variables mydf <- data.frame(as.factor(sample(1:2, 100, replace=TRUE)), as.factor(sample(1:2, 100, replace=TRUE)), as.factor(sample(1:2, 100, replace=TRUE)), as.factor(sample(1:2, 100, replace=TRUE)), as.factor(sample(1:2, 100, replace=TRUE))) # create variable labels items <- list(c("Item 1", "Item 2", "Item 3", "Item 4", "Item 5")) # plot Chi2-contingency-table sjp.chi2(mydf, axis.labels = items) } sjPlot/man/sjplot.Rd0000644000176200001440000000602713611404504014105 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sjplot.R \name{sjplot} \alias{sjplot} \alias{sjtab} \title{Wrapper to create plots and tables within a pipe-workflow} \usage{ sjplot(data, ..., fun = c("grpfrq", "xtab", "aov1", "likert")) sjtab(data, ..., fun = c("xtab", "stackfrq")) } \arguments{ \item{data}{A data frame. May also be a grouped data frame (see 'Note' and 'Examples').} \item{...}{Names of variables that should be plotted, and also further arguments passed down to the \pkg{sjPlot}-functions. See 'Examples'.} \item{fun}{Plotting function. Refers to the function name of \pkg{sjPlot}-functions. See 'Details' and 'Examples'.} } \value{ See related sjp. and sjt.-functions. } \description{ This function has a pipe-friendly argument-structure, with the first argument always being the data, followed by variables that should be plotted or printed as table. The function then transforms the input and calls the requested sjp.- resp. sjt.-function to create a plot or table. \cr \cr Both \code{sjplot()} and \code{sjtab()} support grouped data frames. } \details{ Following \code{fun}-values are currently supported: \describe{ \item{\code{"aov1"}}{calls \code{\link{sjp.aov1}}. The first two variables in \code{data} are used (and required) to create the plot. } \item{\code{"grpfrq"}}{calls \code{\link{plot_grpfrq}}. The first two variables in \code{data} are used (and required) to create the plot. } \item{\code{"likert"}}{calls \code{\link{plot_likert}}. \code{data} must be a data frame with items to plot. } \item{\code{"stackfrq"}}{calls \code{\link{tab_stackfrq}}. \code{data} must be a data frame with items to create the table. } \item{\code{"xtab"}}{calls \code{\link{plot_xtab}} or \code{\link{tab_xtab}}. The first two variables in \code{data} are used (and required) to create the plot or table. } } } \note{ The \code{...}-argument is used, first, to specify the variables from \code{data} that should be plotted, and, second, to name further arguments that are used in the subsequent plotting functions. Refer to the online-help of supported plotting-functions to see valid arguments. \cr \cr \code{data} may also be a grouped data frame (see \code{\link[dplyr]{group_by}}) with up to two grouping variables. Plots are created for each subgroup then. } \examples{ library(dplyr) data(efc) # Grouped frequencies efc \%>\% sjplot(e42dep, c172code, fun = "grpfrq") # Grouped frequencies, as box plots efc \%>\% sjplot(e17age, c172code, fun = "grpfrq", type = "box", geom.colors = "Set1") \dontrun{ # table output of grouped data frame efc \%>\% group_by(e16sex, c172code) \%>\% select(e42dep, n4pstu, e16sex, c172code) \%>\% sjtab(fun = "xtab", use.viewer = FALSE) # open all tables in browser} } sjPlot/man/tab_pca.Rd0000644000176200001440000001343014051650703014162 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tab_pca.R \name{tab_pca} \alias{tab_pca} \title{Summary of principal component analysis as HTML table} \usage{ tab_pca( data, rotation = c("varimax", "quartimax", "promax", "oblimin", "simplimax", "cluster", "none"), nmbr.fctr = NULL, fctr.load.tlrn = 0.1, title = "Principal Component Analysis", var.labels = NULL, wrap.labels = 40, show.cronb = TRUE, show.msa = FALSE, show.var = FALSE, alternate.rows = FALSE, digits = 2, string.pov = "Proportion of Variance", string.cpov = "Cumulative Proportion", CSS = NULL, encoding = NULL, file = NULL, use.viewer = TRUE, remove.spaces = TRUE ) } \arguments{ \item{data}{A data frame that should be used to compute a PCA, or a \code{\link{prcomp}} object.} \item{rotation}{Rotation of the factor loadings. May be one of \code{"varimax", "quartimax", "promax", "oblimin", "simplimax", "cluster"} or \code{"none"}.} \item{nmbr.fctr}{Number of factors used for calculating the rotation. By default, this value is \code{NULL} and the amount of factors is calculated according to the Kaiser-criteria.} \item{fctr.load.tlrn}{Specifies the minimum difference a variable needs to have between factor loadings (components) in order to indicate a clear loading on just one factor and not diffusing over all factors. For instance, a variable with 0.8, 0.82 and 0.84 factor loading on 3 possible factors can not be clearly assigned to just one factor and thus would be removed from the principal component analysis. By default, the minimum difference of loading values between the highest and 2nd highest factor should be 0.1} \item{title}{String, will be used as table caption.} \item{var.labels}{Character vector with variable names, which will be used to label variables in the output.} \item{wrap.labels}{Numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{show.cronb}{Logical, if \code{TRUE} (default), the cronbach's alpha value for each factor scale will be calculated, i.e. all variables with the highest loading for a factor are taken for the reliability test. The result is an alpha value for each factor dimension. Only applies when \code{data} is a data frame.} \item{show.msa}{Logical, if \code{TRUE}, shows an additional column with the measure of sampling adequacy according dor each component.} \item{show.var}{Logical, if \code{TRUE}, the proportions of variances for each component as well as cumulative variance are shown in the table footer.} \item{alternate.rows}{Logical, if \code{TRUE}, rows are printed in alternatig colors (white and light grey by default).} \item{digits}{Amount of decimals for estimates} \item{string.pov}{String for the table row that contains the proportions of variances. By default, \emph{"Proportion of Variance"} will be used.} \item{string.cpov}{String for the table row that contains the cumulative variances. By default, \emph{"Cumulative Proportion"} will be used.} \item{CSS}{A \code{\link{list}} with user-defined style-sheet-definitions, according to the \href{http://www.w3.org/Style/CSS/}{official CSS syntax}. See 'Details' or \href{https://strengejacke.github.io/sjPlot/articles/table_css.html}{this package-vignette}.} \item{encoding}{Character vector, indicating the charset encoding used for variable and value labels. Default is \code{"UTF-8"}. For Windows Systems, \code{encoding = "Windows-1252"} might be necessary for proper display of special characters.} \item{file}{Destination file, if the output should be saved as file. If \code{NULL} (default), the output will be saved as temporary file and opened either in the IDE's viewer pane or the default web browser.} \item{use.viewer}{Logical, if \code{TRUE}, the HTML table is shown in the IDE's viewer pane. If \code{FALSE} or no viewer available, the HTML table is opened in a web browser.} \item{remove.spaces}{Logical, if \code{TRUE}, leading spaces are removed from all lines in the final string that contains the html-data. Use this, if you want to remove parantheses for html-tags. The html-source may look less pretty, but it may help when exporting html-tables to office tools.} } \value{ Invisibly returns \itemize{ \item the web page style sheet (\code{page.style}), \item the web page content (\code{page.content}), \item the complete html-output (\code{page.complete}), \item the html-table with inline-css for use with knitr (\code{knitr}), \item the \code{factor.index}, i.e. the column index of each variable with the highest factor loading for each factor and \item the \code{removed.items}, i.e. which variables have been removed because they were outside of the \code{fctr.load.tlrn}'s range. } for further use. } \description{ Performes a principle component analysis on a data frame or matrix (with varimax or oblimin rotation) and displays the factor solution as HTML table, or saves them as file. \cr \cr In case a data frame is used as parameter, the Cronbach's Alpha value for each factor scale will be calculated, i.e. all variables with the highest loading for a factor are taken for the reliability test. The result is an alpha value for each factor dimension. } \examples{ \dontrun{ # Data from the EUROFAMCARE sample dataset library(sjmisc) data(efc) # recveive first item of COPE-index scale start <- which(colnames(efc) == "c82cop1") # recveive last item of COPE-index scale end <- which(colnames(efc) == "c90cop9") # auto-detection of labels if (interactive()) { tab_pca(efc[, start:end]) }} } sjPlot/man/dist_norm.Rd0000644000176200001440000000365313567425621014607 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sjPlotDist.R \name{dist_norm} \alias{dist_norm} \title{Plot normal distributions} \usage{ dist_norm( norm = NULL, mean = 0, sd = 1, p = NULL, xmax = NULL, geom.colors = NULL, geom.alpha = 0.7 ) } \arguments{ \item{norm}{Numeric, optional. If specified, a normal distribution with \code{mean} and \code{sd} is plotted and a shaded area at \code{norm} value position is plotted that indicates whether or not the specified value is significant or not. If both \code{norm} and \code{p} are not specified, a distribution without shaded area is plotted.} \item{mean}{Numeric. Mean value for normal distribution. By default 0.} \item{sd}{Numeric. Standard deviation for normal distribution. By default 1.} \item{p}{Numeric, optional. If specified, a normal distribution with \code{mean} and \code{sd} is plotted and a shaded area at the position where the specified p-level starts is plotted. If both \code{norm} and \code{p} are not specified, a distribution without shaded area is plotted.} \item{xmax}{Numeric, optional. Specifies the maximum x-axis-value. If not specified, the x-axis ranges to a value where a p-level of 0.00001 is reached.} \item{geom.colors}{user defined color for geoms. See 'Details' in \code{\link{plot_grpfrq}}.} \item{geom.alpha}{Specifies the alpha-level of the shaded area. Default is 0.7, range between 0 to 1.} } \description{ This function plots a simple normal distribution or a normal distribution with shaded areas that indicate at which value a significant p-level is reached. } \examples{ # a simple normal distribution dist_norm() # a simple normal distribution with different mean and sd. # note that curve looks similar to above plot, but axis range # has changed. dist_norm(mean = 2, sd = 4) # a simple normal distribution dist_norm(norm = 1) # a simple normal distribution dist_norm(p = 0.2) } sjPlot/man/save_plot.Rd0000644000176200001440000000350213567425621014576 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/save_plot.R \name{save_plot} \alias{save_plot} \title{Save ggplot-figure for print publication} \usage{ save_plot( filename, fig = last_plot(), width = 12, height = 9, dpi = 300, theme = theme_get(), label.color = "black", label.size = 2.4, axis.textsize = 0.8, axis.titlesize = 0.75, legend.textsize = 0.6, legend.titlesize = 0.65, legend.itemsize = 0.5 ) } \arguments{ \item{filename}{Name of the output file; filename must end with one of the following accepted file types: ".png", ".jpg", ".svg" or ".tif".} \item{fig}{The plot that should be saved. By default, the last plot is saved.} \item{width}{Width of the figure, in centimetres.} \item{height}{Height of the figure, in centimetres.} \item{dpi}{Resolution in dpi (dots per inch). Ignored for vector formats, such as ".svg".} \item{theme}{The default theme to use when saving the plot.} \item{label.color}{Color value for labels (axis, plot, etc.).} \item{label.size}{Fontsize of value labels inside plot area.} \item{axis.textsize}{Fontsize of axis labels.} \item{axis.titlesize}{Fontsize of axis titles.} \item{legend.textsize}{Fontsize of legend labels.} \item{legend.titlesize}{Fontsize of legend title.} \item{legend.itemsize}{Size of legend's item (legend key), in centimetres.} } \description{ Convenient function to save the last ggplot-figure in high quality for publication. } \note{ This is a convenient function with some default settings that should come close to most of the needs for fontsize and scaling in figures when saving them for printing or publishing. It uses cairographics anti-aliasing (see \code{\link[grDevices]{png}}). \cr \cr For adjusting plot appearance, see also \code{\link{sjPlot-themes}}. } sjPlot/man/plot_scatter.Rd0000644000176200001440000001317213776334350015311 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_scatter.R \name{plot_scatter} \alias{plot_scatter} \title{Plot (grouped) scatter plots} \usage{ plot_scatter( data, x, y, grp, title = "", legend.title = NULL, legend.labels = NULL, dot.labels = NULL, axis.titles = NULL, dot.size = 1.5, label.size = 3, colors = "metro", fit.line = NULL, fit.grps = NULL, show.rug = FALSE, show.legend = TRUE, show.ci = FALSE, wrap.title = 50, wrap.legend.title = 20, wrap.legend.labels = 20, jitter = 0.05, emph.dots = FALSE, grid = FALSE ) } \arguments{ \item{data}{A data frame, or a grouped data frame.} \item{x}{Name of the variable for the x-axis.} \item{y}{Name of the variable for the y-axis.} \item{grp}{Optional, name of the grouping-variable. If not missing, the scatter plot will be grouped. See 'Examples'.} \item{title}{Character vector, used as plot title. By default, \code{\link[sjlabelled]{response_labels}} is called to retrieve the label of the dependent variable, which will be used as title. Use \code{title = ""} to remove title.} \item{legend.title}{Character vector, used as legend title for plots that have a legend.} \item{legend.labels}{character vector with labels for the guide/legend.} \item{dot.labels}{Character vector with names for each coordinate pair given by \code{x} and \code{y}, so text labels are added to the plot. Must be of same length as \code{x} and \code{y}. If \code{dot.labels} has a different length, data points will be trimmed to match \code{dot.labels}. If \code{dot.labels = NULL} (default), no labels are printed.} \item{axis.titles}{character vector of length one or two, defining the title(s) for the x-axis and y-axis.} \item{dot.size}{Numeric, size of the dots that indicate the point estimates.} \item{label.size}{Size of text labels if argument \code{dot.labels} is used.} \item{colors}{May be a character vector of color values in hex-format, valid color value names (see \code{demo("colors")}) or a name of a pre-defined color palette. Following options are valid for the \code{colors} argument: \itemize{ \item If not specified, a default color brewer palette will be used, which is suitable for the plot style. \item If \code{"gs"}, a greyscale will be used. \item If \code{"bw"}, and plot-type is a line-plot, the plot is black/white and uses different line types to distinguish groups (see \href{https://strengejacke.github.io/sjPlot/articles/blackwhitefigures.html}{this package-vignette}). \item If \code{colors} is any valid color brewer palette name, the related palette will be used. Use \code{RColorBrewer::display.brewer.all()} to view all available palette names. \item There are some pre-defined color palettes in this package, see \code{\link{sjPlot-themes}} for details. \item Else specify own color values or names as vector (e.g. \code{colors = "#00ff00"} or \code{colors = c("firebrick", "blue")}). }} \item{fit.line, fit.grps}{Specifies the method to add a fitted line accross the data points. Possible values are for instance \code{"lm"}, \code{"glm"}, \code{"loess"} or \code{"auto"}. If \code{NULL}, no line is plotted. \code{fit.line} adds a fitted line for the complete data, while \code{fit.grps} adds a fitted line for each subgroup of \code{grp}.} \item{show.rug}{Logical, if \code{TRUE}, a marginal rug plot is displayed in the graph.} \item{show.legend}{For \emph{Marginal Effects} plots, shows or hides the legend.} \item{show.ci}{Logical, if \code{TRUE)}, adds notches to the box plot, which are used to compare groups; if the notches of two boxes do not overlap, medians are considered to be significantly different.} \item{wrap.title}{Numeric, determines how many chars of the plot title are displayed in one line and when a line break is inserted.} \item{wrap.legend.title}{numeric, determines how many chars of the legend's title are displayed in one line and when a line break is inserted.} \item{wrap.legend.labels}{numeric, determines how many chars of the legend labels are displayed in one line and when a line break is inserted.} \item{jitter}{Numeric, between 0 and 1. If \code{show.data = TRUE}, you can add a small amount of random variation to the location of each data point. \code{jitter} then indicates the width, i.e. how much of a bin's width will be occupied by the jittered values.} \item{emph.dots}{Logical, if \code{TRUE}, overlapping points at same coordinates will be becomme larger, so point size indicates amount of overlapping.} \item{grid}{Logical, if \code{TRUE}, multiple plots are plotted as grid layout.} } \value{ A ggplot-object. For grouped data frames, a list of ggplot-objects for each group in the data. } \description{ Display scatter plot of two variables. Adding a grouping variable to the scatter plot is possible. Furthermore, fitted lines can be added for each group as well as for the overall plot. } \examples{ # load sample date library(sjmisc) library(sjlabelled) data(efc) # simple scatter plot plot_scatter(efc, e16sex, neg_c_7) # simple scatter plot, increased jittering plot_scatter(efc, e16sex, neg_c_7, jitter = .4) # grouped scatter plot plot_scatter(efc, c160age, e17age, e42dep) # grouped scatter plot with marginal rug plot # and add fitted line for complete data plot_scatter( efc, c12hour, c160age, c172code, show.rug = TRUE, fit.line = "lm" ) # grouped scatter plot with marginal rug plot # and add fitted line for each group plot_scatter( efc, c12hour, c160age, c172code, show.rug = TRUE, fit.grps = "loess", grid = TRUE ) } sjPlot/man/dist_t.Rd0000644000176200001440000000363613567425621014100 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sjPlotDist.R \name{dist_t} \alias{dist_t} \title{Plot t-distributions} \usage{ dist_t( t = NULL, deg.f = NULL, p = NULL, xmax = NULL, geom.colors = NULL, geom.alpha = 0.7 ) } \arguments{ \item{t}{Numeric, optional. If specified, a t-distribution with \code{deg.f} degrees of freedom is plotted and a shaded area at \code{t} value position is plotted that indicates whether or not the specified value is significant or not. If both \code{t} and \code{p} are not specified, a distribution without shaded area is plotted.} \item{deg.f}{Numeric. The degrees of freedom for the t-distribution. Needs to be specified.} \item{p}{Numeric, optional. If specified, a t-distribution with \code{deg.f} degrees of freedom is plotted and a shaded area at the position where the specified p-level starts is plotted. If both \code{t} and \code{p} are not specified, a distribution without shaded area is plotted.} \item{xmax}{Numeric, optional. Specifies the maximum x-axis-value. If not specified, the x-axis ranges to a value where a p-level of 0.00001 is reached.} \item{geom.colors}{user defined color for geoms. See 'Details' in \code{\link{plot_grpfrq}}.} \item{geom.alpha}{Specifies the alpha-level of the shaded area. Default is 0.7, range between 0 to 1.} } \description{ This function plots a simple t-distribution or a t-distribution with shaded areas that indicate at which t-value a significant p-level is reached. } \examples{ # a simple t-distribution # for 6 degrees of freedom dist_t(deg.f = 6) # a t-distribution for 6 degrees of freedom, # and a shaded area starting at t-value of one. # With a df of 6, a t-value of 1.94 would be "significant". dist_t(t = 1, deg.f = 6) # a t-distribution for 6 degrees of freedom, # and a shaded area starting at p-level of 0.4 # (t-value of about 0.26). dist_t(p = 0.4, deg.f = 6) } sjPlot/man/plot_kfold_cv.Rd0000644000176200001440000000456014136600651015423 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_kfold_cv.R \name{plot_kfold_cv} \alias{plot_kfold_cv} \title{Plot model fit from k-fold cross-validation} \usage{ plot_kfold_cv(data, formula, k = 5, fit) } \arguments{ \item{data}{A data frame, used to split the data into \code{k} training-test-pairs.} \item{formula}{A model formula, used to fit linear models (\code{\link[stats]{lm}}) over all \code{k} training data sets. Use \code{fit} to specify a fitted model (also other models than linear models), which will be used to compute cross validation. If \code{fit} is not missing, \code{formula} will be ignored.} \item{k}{Number of folds.} \item{fit}{Model object, which will be used to compute cross validation. If \code{fit} is not missing, \code{formula} will be ignored. Currently, only linear, poisson and negative binomial regression models are supported.} } \description{ This function plots the aggregated residuals of k-fold cross-validated models against the outcome. This allows to evaluate how the model performs according over- or underestimation of the outcome. } \details{ This function, first, generates \code{k} cross-validated test-training pairs and fits the same model, specified in the \code{formula}- or \code{fit}- argument, over all training data sets. \cr \cr Then, the test data is used to predict the outcome from all models that have been fit on the training data, and the residuals from all test data is plotted against the observed values (outcome) from the test data (note: for poisson or negative binomial models, the deviance residuals are calculated). This plot can be used to validate the model and see, whether it over- (residuals > 0) or underestimates (residuals < 0) the model's outcome. } \note{ Currently, only linear, poisson and negative binomial regression models are supported. } \examples{ data(efc) plot_kfold_cv(efc, neg_c_7 ~ e42dep + c172code + c12hour) plot_kfold_cv(mtcars, mpg ~.) # for poisson models. need to fit a model and use 'fit'-argument fit <- glm(tot_sc_e ~ neg_c_7 + c172code, data = efc, family = poisson) plot_kfold_cv(efc, fit = fit) # and for negative binomial models fit <- MASS::glm.nb(tot_sc_e ~ neg_c_7 + c172code, data = efc) plot_kfold_cv(efc, fit = fit) } sjPlot/man/tab_fa.Rd0000644000176200001440000001375214147747730014031 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tab_fa.R \name{tab_fa} \alias{tab_fa} \title{Summary of factor analysis as HTML table} \usage{ tab_fa( data, rotation = "promax", method = c("ml", "minres", "wls", "gls", "pa", "minchi", "minrank"), nmbr.fctr = NULL, fctr.load.tlrn = 0.1, sort = FALSE, title = "Factor Analysis", var.labels = NULL, wrap.labels = 40, show.cronb = TRUE, show.comm = FALSE, alternate.rows = FALSE, digits = 2, CSS = NULL, encoding = NULL, file = NULL, use.viewer = TRUE, remove.spaces = TRUE ) } \arguments{ \item{data}{A data frame that should be used to compute a PCA, or a \code{\link{prcomp}} object.} \item{rotation}{Rotation of the factor loadings. May be one of \code{"varimax", "quartimax", "promax", "oblimin", "simplimax", "cluster"} or \code{"none"}.} \item{method}{the factoring method to be used. \code{"ml"} will do a maximum likelihood factor analysis (default). \code{"minres"} will do a minimum residual (OLS), \code{"wls"} will do a weighted least squares (WLS) solution, \code{"gls"} does a generalized weighted least squares (GLS), \code{"pa"} will do the principal factor solution, \code{"minchi"} will minimize the sample size weighted chi square when treating pairwise correlations with different number of subjects per pair. \code{"minrank"} will do a minimum rank factor analysis.} \item{nmbr.fctr}{Number of factors used for calculating the rotation. By default, this value is \code{NULL} and the amount of factors is calculated according to the Kaiser-criteria.} \item{fctr.load.tlrn}{Specifies the minimum difference a variable needs to have between factor loadings (components) in order to indicate a clear loading on just one factor and not diffusing over all factors. For instance, a variable with 0.8, 0.82 and 0.84 factor loading on 3 possible factors can not be clearly assigned to just one factor and thus would be removed from the principal component analysis. By default, the minimum difference of loading values between the highest and 2nd highest factor should be 0.1} \item{sort}{logical, if \code{TRUE}, sort the loadings for each factors (items will be sorted in terms of their greatest loading, in descending order)} \item{title}{String, will be used as table caption.} \item{var.labels}{Character vector with variable names, which will be used to label variables in the output.} \item{wrap.labels}{Numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{show.cronb}{Logical, if \code{TRUE} (default), the cronbach's alpha value for each factor scale will be calculated, i.e. all variables with the highest loading for a factor are taken for the reliability test. The result is an alpha value for each factor dimension. Only applies when \code{data} is a data frame.} \item{show.comm}{Logical, if \code{TRUE}, show the communality column in the table.} \item{alternate.rows}{Logical, if \code{TRUE}, rows are printed in alternatig colors (white and light grey by default).} \item{digits}{Amount of decimals for estimates} \item{CSS}{A \code{\link{list}} with user-defined style-sheet-definitions, according to the \href{http://www.w3.org/Style/CSS/}{official CSS syntax}. See 'Details' or \href{https://strengejacke.github.io/sjPlot/articles/table_css.html}{this package-vignette}.} \item{encoding}{Character vector, indicating the charset encoding used for variable and value labels. Default is \code{"UTF-8"}. For Windows Systems, \code{encoding = "Windows-1252"} might be necessary for proper display of special characters.} \item{file}{Destination file, if the output should be saved as file. If \code{NULL} (default), the output will be saved as temporary file and opened either in the IDE's viewer pane or the default web browser.} \item{use.viewer}{Logical, if \code{TRUE}, the HTML table is shown in the IDE's viewer pane. If \code{FALSE} or no viewer available, the HTML table is opened in a web browser.} \item{remove.spaces}{Logical, if \code{TRUE}, leading spaces are removed from all lines in the final string that contains the html-data. Use this, if you want to remove parantheses for html-tags. The html-source may look less pretty, but it may help when exporting html-tables to office tools.} } \value{ Invisibly returns \itemize{ \item the web page style sheet (\code{page.style}), \item the web page content (\code{page.content}), \item the complete html-output (\code{page.complete}), \item the html-table with inline-css for use with knitr (\code{knitr}), \item the \code{factor.index}, i.e. the column index of each variable with the highest factor loading for each factor and \item the \code{removed.items}, i.e. which variables have been removed because they were outside of the \code{fctr.load.tlrn}'s range. } for further use. } \description{ Performs a factor analysis on a data frame or matrix and displays the factors as HTML table, or saves them as file. \cr \cr In case a data frame is used as parameter, the Cronbach's Alpha value for each factor scale will be calculated, i.e. all variables with the highest loading for a factor are taken for the reliability test. The result is an alpha value for each factor dimension. } \note{ This method for factor analysis relies on the functions \code{\link[psych]{fa}} and \code{\link[psych]{fa.parallel}} from the psych package. } \examples{ \dontrun{ # Data from the EUROFAMCARE sample dataset library(sjmisc) library(GPArotation) data(efc) # recveive first item of COPE-index scale start <- which(colnames(efc) == "c82cop1") # recveive last item of COPE-index scale end <- which(colnames(efc) == "c90cop9") # auto-detection of labels if (interactive()) { tab_fa(efc[, start:end]) }} } sjPlot/man/plot_frq.Rd0000644000176200001440000002461513776334350014440 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_frq.R \name{plot_frq} \alias{plot_frq} \title{Plot frequencies of variables} \usage{ plot_frq( data, ..., title = "", weight.by = NULL, title.wtd.suffix = NULL, sort.frq = c("none", "asc", "desc"), type = c("bar", "dot", "histogram", "line", "density", "boxplot", "violin"), geom.size = NULL, geom.colors = "#336699", errorbar.color = "darkred", axis.title = NULL, axis.labels = NULL, xlim = NULL, ylim = NULL, wrap.title = 50, wrap.labels = 20, grid.breaks = NULL, expand.grid = FALSE, show.values = TRUE, show.n = TRUE, show.prc = TRUE, show.axis.values = TRUE, show.ci = FALSE, show.na = FALSE, show.mean = FALSE, show.mean.val = TRUE, show.sd = TRUE, drop.empty = TRUE, mean.line.type = 2, mean.line.size = 0.5, inner.box.width = 0.15, inner.box.dotsize = 3, normal.curve = FALSE, normal.curve.color = "red", normal.curve.size = 0.8, normal.curve.alpha = 0.4, auto.group = NULL, coord.flip = FALSE, vjust = "bottom", hjust = "center", y.offset = NULL ) } \arguments{ \item{data}{A data frame, or a grouped data frame.} \item{...}{Optional, unquoted names of variables that should be selected for further processing. Required, if \code{data} is a data frame (and no vector) and only selected variables from \code{data} should be processed. You may also use functions like \code{:} or tidyselect's select_helpers.} \item{title}{Character vector, used as plot title. By default, \code{\link[sjlabelled]{response_labels}} is called to retrieve the label of the dependent variable, which will be used as title. Use \code{title = ""} to remove title.} \item{weight.by}{Vector of weights that will be applied to weight all cases. Must be a vector of same length as the input vector. Default is \code{NULL}, so no weights are used.} \item{title.wtd.suffix}{Suffix (as string) for the title, if \code{weight.by} is specified, e.g. \code{title.wtd.suffix=" (weighted)"}. Default is \code{NULL}, so title will not have a suffix when cases are weighted.} \item{sort.frq}{Determines whether categories should be sorted according to their frequencies or not. Default is \code{"none"}, so categories are not sorted by frequency. Use \code{"asc"} or \code{"desc"} for sorting categories ascending or descending order.} \item{type}{Specifies the plot type. May be abbreviated. \describe{ \item{\code{"bar"}}{for simple bars (default)} \item{\code{"dot"}}{for a dot plot} \item{\code{"histogram"}}{for a histogram (does not apply to grouped frequencies)} \item{\code{"line"}}{for a line-styled histogram with filled area} \item{\code{"density"}}{for a density plot (does not apply to grouped frequencies)} \item{\code{"boxplot"}}{for box plot} \item{\code{"violin"}}{for violin plots} }} \item{geom.size}{size resp. width of the geoms (bar width, line thickness or point size, depending on plot type and function). Note that bar and bin widths mostly need smaller values than dot sizes.} \item{geom.colors}{User defined color for geoms, e.g. \code{geom.colors = "#0080ff"}.} \item{errorbar.color}{Color of confidence interval bars (error bars). Only applies to \code{type = "bar"}. In case of dot plots, error bars will have same colors as dots (see \code{geom.colors}).} \item{axis.title}{Character vector of length one or two (depending on the plot function and type), used as title(s) for the x and y axis. If not specified, a default labelling is chosen. \strong{Note:} Some plot types do not support this argument. In such cases, use the return value and add axis titles manually with \code{\link[ggplot2]{labs}}, e.g.: \code{$plot.list[[1]] + labs(x = ...)}} \item{axis.labels}{character vector with labels used as axis labels. Optional argument, since in most cases, axis labels are set automatically.} \item{xlim}{Numeric vector of length two, defining lower and upper axis limits of the x scale. By default, this argument is set to \code{NULL}, i.e. the x-axis fits to the required range of the data.} \item{ylim}{numeric vector of length two, defining lower and upper axis limits of the y scale. By default, this argument is set to \code{NULL}, i.e. the y-axis fits to the required range of the data.} \item{wrap.title}{Numeric, determines how many chars of the plot title are displayed in one line and when a line break is inserted.} \item{wrap.labels}{numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{grid.breaks}{numeric; sets the distance between breaks for the axis, i.e. at every \code{grid.breaks}'th position a major grid is being printed.} \item{expand.grid}{logical, if \code{TRUE}, the plot grid is expanded, i.e. there is a small margin between axes and plotting region. Default is \code{FALSE}.} \item{show.values}{Logical, whether values should be plotted or not.} \item{show.n}{logical, if \code{TRUE}, adds total number of cases for each group or category to the labels.} \item{show.prc}{logical, if \code{TRUE} (default), percentage values are plotted to each bar If \code{FALSE}, percentage values are removed.} \item{show.axis.values}{logical, whether category, count or percentage values for the axis should be printed or not.} \item{show.ci}{Logical, if \code{TRUE)}, adds notches to the box plot, which are used to compare groups; if the notches of two boxes do not overlap, medians are considered to be significantly different.} \item{show.na}{logical, if \code{TRUE}, \code{\link{NA}}'s (missing values) are added to the output.} \item{show.mean}{Logical, if \code{TRUE}, a vertical line in histograms is drawn to indicate the mean value of the variables. Only applies to histogram-charts.} \item{show.mean.val}{Logical, if \code{TRUE} (default), the mean value is printed to the vertical line that indicates the variable's mean. Only applies to histogram-charts.} \item{show.sd}{Logical, if \code{TRUE}, the standard deviation is annotated as shaded rectangle around the mean intercept line. Only applies to histogram-charts.} \item{drop.empty}{Logical, if \code{TRUE} and the variable's values are labeled, values that have no observations are still printed in the table (with frequency \code{0}). If \code{FALSE}, values / factor levels with no occurrence in the data are omitted from the output.} \item{mean.line.type}{Numeric value, indicating the linetype of the mean intercept line. Only applies to histogram-charts and when \code{show.mean = TRUE}.} \item{mean.line.size}{Numeric, size of the mean intercept line. Only applies to histogram-charts and when \code{show.mean = TRUE}.} \item{inner.box.width}{width of the inner box plot that is plotted inside of violin plots. Only applies if \code{type = "violin"}. Default value is 0.15} \item{inner.box.dotsize}{size of mean dot insie a violin or box plot. Applies only when \code{type = "violin"} or \code{"boxplot"}.} \item{normal.curve}{Logical, if \code{TRUE}, a normal curve, which is adjusted to the data, is plotted over the histogram or density plot. Default is \code{FALSE}. Only applies when histograms or density plots are plotted (see \code{type}).} \item{normal.curve.color}{Color of the normal curve line. Only applies if \code{normal.curve = TRUE}.} \item{normal.curve.size}{Numeric, size of the normal curve line. Only applies if \code{normal.curve = TRUE}.} \item{normal.curve.alpha}{Transparancy level (alpha value) of the normal curve. Only applies if \code{normal.curve = TRUE}.} \item{auto.group}{numeric value, indicating the minimum amount of unique values in the count variable, at which automatic grouping into smaller units is done (see \code{\link[sjmisc]{group_var}}). Default value for \code{auto.group} is \code{NULL}, i.e. auto-grouping is off. See \code{\link[sjmisc]{group_var}} for examples on grouping.} \item{coord.flip}{logical, if \code{TRUE}, the x and y axis are swapped.} \item{vjust}{character vector, indicating the vertical position of value labels. Allowed are same values as for \code{vjust} aesthetics from \code{ggplot2}: "left", "center", "right", "bottom", "middle", "top" and new options like "inward" and "outward", which align text towards and away from the center of the plot respectively.} \item{hjust}{character vector, indicating the horizontal position of value labels. Allowed are same values as for \code{vjust} aesthetics from \code{ggplot2}: "left", "center", "right", "bottom", "middle", "top" and new options like "inward" and "outward", which align text towards and away from the center of the plot respectively.} \item{y.offset}{numeric, offset for text labels when their alignment is adjusted to the top/bottom of the geom (see \code{hjust} and \code{vjust}).} } \value{ A ggplot-object. } \description{ Plot frequencies of a variable as bar graph, histogram, box plot etc. } \note{ This function only works with variables with integer values (or numeric factor levels), i.e. scales / centered variables with fractional part may result in unexpected behaviour. } \examples{ library(sjlabelled) data(efc) data(iris) # simple plots, two different notations plot_frq(iris, Species) plot_frq(efc$tot_sc_e) # boxplot plot_frq(efc$e17age, type = "box") if (require("dplyr")) { # histogram, pipe-workflow efc \%>\% dplyr::select(e17age, c160age) \%>\% plot_frq(type = "hist", show.mean = TRUE) # bar plot(s) plot_frq(efc, e42dep, c172code) } if (require("dplyr") && require("gridExtra")) { # grouped data frame, all panels in one plot efc \%>\% group_by(e42dep) \%>\% plot_frq(c161sex) \%>\% plot_grid() } \donttest{ library(sjmisc) # grouped variable ageGrp <- group_var(efc$e17age) ageGrpLab <- group_labels(efc$e17age) plot_frq(ageGrp, title = get_label(efc$e17age), axis.labels = ageGrpLab) # plotting confidence intervals. expand grid and v/hjust for text labels plot_frq( efc$e15relat, type = "dot", show.ci = TRUE, sort.frq = "desc", coord.flip = TRUE, expand.grid = TRUE, vjust = "bottom", hjust = "left" ) # histogram with overlayed normal curve plot_frq(efc$c160age, type = "h", show.mean = TRUE, show.mean.val = TRUE, normal.curve = TRUE, show.sd = TRUE, normal.curve.color = "blue", normal.curve.size = 3, ylim = c(0,50)) } } sjPlot/man/tab_df.Rd0000644000176200001440000001277114051650703014017 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/html_print.R \name{tab_df} \alias{tab_df} \alias{tab_dfs} \title{Print data frames as HTML table.} \usage{ tab_df( x, title = NULL, footnote = NULL, col.header = NULL, show.type = FALSE, show.rownames = FALSE, show.footnote = FALSE, alternate.rows = FALSE, sort.column = NULL, digits = 2, encoding = "UTF-8", CSS = NULL, file = NULL, use.viewer = TRUE, ... ) tab_dfs( x, titles = NULL, footnotes = NULL, col.header = NULL, show.type = FALSE, show.rownames = FALSE, show.footnote = FALSE, alternate.rows = FALSE, sort.column = NULL, digits = 2, encoding = "UTF-8", CSS = NULL, file = NULL, use.viewer = TRUE, ... ) } \arguments{ \item{x}{For \code{tab_df()}, a data frame; and for \code{tab_dfs()}, a list of data frames.} \item{title, titles, footnote, footnotes}{Character vector with table caption(s) resp. footnote(s). For \code{tab_df()}, must be a character of length 1; for \code{tab_dfs()}, a character vector of same length as \code{x} (i.e. one title or footnote per data frame).} \item{col.header}{Character vector with elements used as column header for the table. If \code{NULL}, column names from \code{x} are used as column header.} \item{show.type}{Logical, if \code{TRUE}, adds information about the variable type to the variable column.} \item{show.rownames}{Logical, if \code{TRUE}, adds a column with the data frame's rowname to the table output.} \item{show.footnote}{Logical, if \code{TRUE},adds a summary footnote below the table. For \code{tab_df()}, specify the string in \code{footnote}, for \code{tab_dfs()} provide a character vector in \code{footnotes}.} \item{alternate.rows}{Logical, if \code{TRUE}, rows are printed in alternatig colors (white and light grey by default).} \item{sort.column}{Numeric vector, indicating the index of the column that should sorted. by default, the column is sorted in ascending order. Use negative index for descending order, for instance, \code{sort.column = -3} would sort the third column in descending order. Note that the first column with rownames is not counted.} \item{digits}{Numeric, amount of digits after decimal point when rounding values.} \item{encoding}{Character vector, indicating the charset encoding used for variable and value labels. Default is \code{"UTF-8"}. For Windows Systems, \code{encoding = "Windows-1252"} might be necessary for proper display of special characters.} \item{CSS}{A \code{\link{list}} with user-defined style-sheet-definitions, according to the \href{http://www.w3.org/Style/CSS/}{official CSS syntax}. See 'Details' or \href{https://strengejacke.github.io/sjPlot/articles/table_css.html}{this package-vignette}.} \item{file}{Destination file, if the output should be saved as file. If \code{NULL} (default), the output will be saved as temporary file and opened either in the IDE's viewer pane or the default web browser.} \item{use.viewer}{Logical, if \code{TRUE}, the HTML table is shown in the IDE's viewer pane. If \code{FALSE} or no viewer available, the HTML table is opened in a web browser.} \item{...}{Currently not used.} } \value{ A list with following items: \itemize{ \item the web page style sheet (\code{page.style}), \item the HTML content of the data frame (\code{page.content}), \item the complete HTML page, including header, style sheet and body (\code{page.complete}) \item the HTML table with inline-css for use with knitr (\code{knitr}) \item the file path, if the HTML page should be saved to disk (\code{file}) } } \description{ These functions print data frames as HTML-table, showing the results in RStudio's viewer pane or in a web browser. } \details{ \strong{How do I use \code{CSS}-argument?} \cr \cr With the \code{CSS}-argument, the visual appearance of the tables can be modified. To get an overview of all style-sheet-classnames that are used in this function, see return value \code{page.style} for details. Arguments for this list have following syntax: \enumerate{ \item the class-name as argument name and \item each style-definition must end with a semicolon } You can add style information to the default styles by using a + (plus-sign) as initial character for the argument attributes. Examples: \itemize{ \item \code{table = 'border:2px solid red;'} for a solid 2-pixel table border in red. \item \code{summary = 'font-weight:bold;'} for a bold fontweight in the summary row. \item \code{lasttablerow = 'border-bottom: 1px dotted blue;'} for a blue dotted border of the last table row. \item \code{colnames = '+color:green'} to add green color formatting to column names. \item \code{arc = 'color:blue;'} for a blue text color each 2nd row. \item \code{caption = '+color:red;'} to add red font-color to the default table caption style. } See further examples in \href{https://strengejacke.github.io/sjPlot/articles/table_css.html}{this package-vignette}. } \note{ The HTML tables can either be saved as file and manually opened (use argument \code{file}) or they can be saved as temporary files and will be displayed in the RStudio Viewer pane (if working with RStudio) or opened with the default web browser. Displaying resp. opening a temporary file is the default behaviour. } \examples{ \dontrun{ data(iris) data(mtcars) tab_df(iris[1:5, ]) tab_dfs(list(iris[1:5, ], mtcars[1:5, 1:5])) # sort 2nd column ascending tab_df(iris[1:5, ], sort.column = 2) # sort 2nd column descending tab_df(iris[1:5, ], sort.column = -2)} } sjPlot/man/sjPlot-package.Rd0000644000176200001440000000377013733137536015455 0ustar liggesusers\encoding{UTF-8} \name{sjPlot-package} \alias{sjPlot-package} \alias{sjPlot} \docType{package} \title{Data Visualization for Statistics in Social Science} \description{ Collection of plotting and table output functions for data visualization. Results of various statistical analyses (that are commonly used in social sciences) can be visualized using this package, including simple and cross tabulated frequencies, histograms, box plots, (generalized) linear models, mixed effects models, PCA and correlation matrices, cluster analyses, scatter plots, Likert scales, effects plots of interaction terms in regression models, constructing index or score variables and much more. The package supports labelled data, i.e. value and variable labels from labelled data (like vectors or data frames) are automatically used to label the output. Own labels can be specified as well. \emph{What does this package do?} In short, the functions in this package mostly do two things: \enumerate{ \item compute basic or advanced statistical analyses \item either plot the results as ggplot-figure or print them as html-table } \emph{How does this package help me?} One of the more challenging tasks when working with R is to get nicely formatted output of statistical analyses, either in graphical or table format. The sjPlot-package takes over these tasks and makes it easy to create beautiful figures or tables. There are many examples for each function in the related help files and a comprehensive online documentation at \url{https://strengejacke.github.io/sjPlot/}. \emph{A note on the package functions} The main functions follow specific naming conventions, hence starting with a specific prefix, which indicates what kind of task these functions perform. \itemize{ \item \code{sjc} - cluster analysis functions \item \code{sjp} - plotting functions \item \code{sjt} - (HTML) table output functions } } \author{ Daniel Lüdecke \email{d.luedecke@uke.de} } sjPlot/man/tab_itemscale.Rd0000644000176200001440000002405514051650703015372 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tab_itemscale.R \name{tab_itemscale} \alias{tab_itemscale} \alias{sjt.itemanalysis} \title{Summary of item analysis of an item scale as HTML table} \usage{ tab_itemscale( df, factor.groups = NULL, factor.groups.titles = "auto", scale = FALSE, min.valid.rowmean = 2, alternate.rows = TRUE, sort.column = NULL, show.shapiro = FALSE, show.kurtosis = FALSE, show.corr.matrix = TRUE, CSS = NULL, encoding = NULL, file = NULL, use.viewer = TRUE, remove.spaces = TRUE ) sjt.itemanalysis( df, factor.groups = NULL, factor.groups.titles = "auto", scale = FALSE, min.valid.rowmean = 2, alternate.rows = TRUE, sort.column = NULL, show.shapiro = FALSE, show.kurtosis = FALSE, show.corr.matrix = TRUE, CSS = NULL, encoding = NULL, file = NULL, use.viewer = TRUE, remove.spaces = TRUE ) } \arguments{ \item{df}{A data frame with items.} \item{factor.groups}{If not \code{NULL}, \code{df} will be splitted into sub-groups, where the item analysis is carried out for each of these groups. Must be a vector of same length as \code{ncol(df)}, where each item in this vector represents the group number of the related columns of \code{df}. If \code{factor.groups = "auto"}, a principal component analysis with Varimax rotation is performed, and the resulting groups for the components are used as group index. See 'Examples'.} \item{factor.groups.titles}{Titles for each factor group that will be used as table caption for each component-table. Must be a character vector of same length as \code{length(unique(factor.groups))}. Default is \code{"auto"}, which means that each table has a standard caption \emph{Component x}. Use \code{NULL} to suppress table captions.} \item{scale}{Logical, if \code{TRUE}, the data frame's vectors will be scaled when calculating the Cronbach's Alpha value (see \code{\link[performance]{item_reliability}}). Recommended, when the variables have different measures / scales.} \item{min.valid.rowmean}{Minimum amount of valid values to compute row means for index scores. Default is 2, i.e. the return values \code{index.scores} and \code{df.index.scores} are computed for those items that have at least \code{min.valid.rowmean} per case (observation, or technically, row). See \code{mean_n} for details.} \item{alternate.rows}{Logical, if \code{TRUE}, rows are printed in alternatig colors (white and light grey by default).} \item{sort.column}{Numeric vector, indicating the index of the column that should sorted. by default, the column is sorted in ascending order. Use negative index for descending order, for instance, \code{sort.column = -3} would sort the third column in descending order. Note that the first column with rownames is not counted.} \item{show.shapiro}{Logical, if \code{TRUE}, a Shapiro-Wilk normality test is computed for each item. See \code{\link{shapiro.test}} for details.} \item{show.kurtosis}{Logical, if \code{TRUE}, the kurtosis for each item will also be shown (see \code{\link[psych]{kurtosi}} and \code{\link[psych]{describe}} in the \code{psych}-package for more details.} \item{show.corr.matrix}{Logical, if \code{TRUE} (default), a correlation matrix of each component's index score is shown. Only applies if \code{factor.groups} is not \code{NULL} and \code{df} has more than one group. First, for each case (df's row), the sum of all variables (df's columns) is scaled (using the \code{\link{scale}}-function) and represents a "total score" for each component (a component is represented by each group of \code{factor.groups}). After that, each case (df's row) has a scales sum score for each component. Finally, a correlation of these "scale sum scores" is computed.} \item{CSS}{A \code{\link{list}} with user-defined style-sheet-definitions, according to the \href{http://www.w3.org/Style/CSS/}{official CSS syntax}. See 'Details' or \href{https://strengejacke.github.io/sjPlot/articles/table_css.html}{this package-vignette}.} \item{encoding}{Character vector, indicating the charset encoding used for variable and value labels. Default is \code{"UTF-8"}. For Windows Systems, \code{encoding = "Windows-1252"} might be necessary for proper display of special characters.} \item{file}{Destination file, if the output should be saved as file. If \code{NULL} (default), the output will be saved as temporary file and opened either in the IDE's viewer pane or the default web browser.} \item{use.viewer}{Logical, if \code{TRUE}, the HTML table is shown in the IDE's viewer pane. If \code{FALSE} or no viewer available, the HTML table is opened in a web browser.} \item{remove.spaces}{Logical, if \code{TRUE}, leading spaces are removed from all lines in the final string that contains the html-data. Use this, if you want to remove parantheses for html-tags. The html-source may look less pretty, but it may help when exporting html-tables to office tools.} } \value{ Invisibly returns \itemize{ \item \code{df.list}: List of data frames with the item analysis for each sub.group (or complete, if \code{factor.groups} was \code{NULL}) \item \code{index.scores}: A data frame with of standardized scale / index scores for each case (mean value of all scale items for each case) for each sub-group. \item \code{ideal.item.diff}: List of vectors that indicate the ideal item difficulty for each item in each sub-group. Item difficulty only differs when items have different levels. \item \code{cronbach.values}: List of Cronbach's Alpha values for the overall item scale for each sub-group. \item \code{knitr.list}: List of html-tables with inline-css for use with knitr for each table (sub-group) \item \code{knitr}: html-table of all complete output with inline-css for use with knitr \item \code{complete.page}: Complete html-output. } If \code{factor.groups = NULL}, each list contains only one elment, since just one table is printed for the complete scale indicated by \code{df}. If \code{factor.groups} is a vector of group-index-values, the lists contain elements for each sub-group. } \description{ This function performs an item analysis with certain statistics that are useful for scale or index development. The resulting tables are shown in the viewer pane resp. webbrowser or can be saved as file. Following statistics are computed for each item of a data frame: \itemize{ \item percentage of missing values \item mean value \item standard deviation \item skew \item item difficulty \item item discrimination \item Cronbach's Alpha if item was removed from scale \item mean (or average) inter-item-correlation } Optional, following statistics can be computed as well: \itemize{ \item kurstosis \item Shapiro-Wilk Normality Test } If \code{factor.groups} is not \code{NULL}, the data frame \code{df} will be splitted into groups, assuming that \code{factor.groups} indicate those columns of the data frame that belong to a certain factor (see return value of function \code{\link{tab_pca}} as example for retrieving factor groups for a scale and see examples for more details). } \note{ \itemize{ \item The \emph{Shapiro-Wilk Normality Test} (see column \code{W(p)}) tests if an item has a distribution that is significantly different from normal. \item \emph{Item difficulty} should range between 0.2 and 0.8. Ideal value is \code{p+(1-p)/2} (which mostly is between 0.5 and 0.8). \item For \emph{item discrimination}, acceptable values are 0.20 or higher; the closer to 1.00 the better. See \code{\link[performance]{item_reliability}} for more details. \item In case the total \emph{Cronbach's Alpha} value is below the acceptable cut-off of 0.7 (mostly if an index has few items), the \emph{mean inter-item-correlation} is an alternative measure to indicate acceptability. Satisfactory range lies between 0.2 and 0.4. See also \code{\link[performance]{item_intercor}}. } } \examples{ # Data from the EUROFAMCARE sample dataset library(sjmisc) library(sjlabelled) data(efc) # retrieve variable and value labels varlabs <- get_label(efc) # recveive first item of COPE-index scale start <- which(colnames(efc) == "c82cop1") # recveive last item of COPE-index scale end <- which(colnames(efc) == "c90cop9") # create data frame with COPE-index scale mydf <- data.frame(efc[, start:end]) colnames(mydf) <- varlabs[start:end] \dontrun{ if (interactive()) { tab_itemscale(mydf) # auto-detection of labels tab_itemscale(efc[, start:end]) # Compute PCA on Cope-Index, and perform a # item analysis for each extracted factor. indices <- tab_pca(mydf)$factor.index tab_itemscale(mydf, factor.groups = indices) # or, equivalent tab_itemscale(mydf, factor.groups = "auto") }} } \references{ \itemize{ \item Jorion N, Self B, James K, Schroeder L, DiBello L, Pellegrino J (2013) Classical Test Theory Analysis of the Dynamics Concept Inventory. (\href{https://www.academia.edu/4104752/Classical_Test_Theory_Analysis_of_the_Dynamics_Concept_Inventory}{web}) \item Briggs SR, Cheek JM (1986) The role of factor analysis in the development and evaluation of personality scales. Journal of Personality, 54(1), 106-148. doi: 10.1111/j.1467-6494.1986.tb00391.x \item McLean S et al. (2013) Stigmatizing attitudes and beliefs about bulimia nervosa: Gender, age, education and income variability in a community sample. International Journal of Eating Disorders. doi: 10.1002/eat.22227 \item Trochim WMK (2008) Types of Reliability. (\href{https://conjointly.com/kb/types-of-reliability/}{web}) } } sjPlot/man/tab_stackfrq.Rd0000644000176200001440000001607014051650703015240 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tab_stackfrq.R \name{tab_stackfrq} \alias{tab_stackfrq} \title{Summary of stacked frequencies as HTML table} \usage{ tab_stackfrq( items, weight.by = NULL, title = NULL, var.labels = NULL, value.labels = NULL, wrap.labels = 20, sort.frq = NULL, alternate.rows = FALSE, digits = 2, string.total = "N", string.na = "NA", show.n = FALSE, show.total = FALSE, show.na = FALSE, show.skew = FALSE, show.kurtosis = FALSE, digits.stats = 2, file = NULL, encoding = NULL, CSS = NULL, use.viewer = TRUE, remove.spaces = TRUE ) } \arguments{ \item{items}{Data frame, or a grouped data frame, with each column representing one item.} \item{weight.by}{Vector of weights that will be applied to weight all cases. Must be a vector of same length as the input vector. Default is \code{NULL}, so no weights are used.} \item{title}{Character vector with table caption(s) resp. footnote(s). For \code{tab_df()}, must be a character of length 1; for \code{tab_dfs()}, a character vector of same length as \code{x} (i.e. one title or footnote per data frame).} \item{var.labels}{Character vector with variable names, which will be used to label variables in the output.} \item{value.labels}{Character vector (or \code{list} of character vectors) with value labels of the supplied variables, which will be used to label variable values in the output.} \item{wrap.labels}{Numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{sort.frq}{logical, indicates whether the \code{items} should be ordered by by highest count of first or last category of \code{items}. \itemize{ \item Use \code{"first.asc"} to order ascending by lowest count of first category, \item \code{"first.desc"} to order descending by lowest count of first category, \item \code{"last.asc"} to order ascending by lowest count of last category, \item \code{"last.desc"} to order descending by lowest count of last category, \item or \code{NULL} (default) for no sorting. }} \item{alternate.rows}{Logical, if \code{TRUE}, rows are printed in alternatig colors (white and light grey by default).} \item{digits}{Numeric, amount of digits after decimal point when rounding values.} \item{string.total}{label for the total N column.} \item{string.na}{label for the missing column/row.} \item{show.n}{logical, if \code{TRUE}, adds total number of cases for each group or category to the labels.} \item{show.total}{logical, if \code{TRUE}, an additional column with each item's total N is printed.} \item{show.na}{logical, if \code{TRUE}, \code{\link{NA}}'s (missing values) are added to the output.} \item{show.skew}{logical, if \code{TRUE}, an additional column with each item's skewness is printed. The skewness is retrieved from the \code{\link[psych]{describe}}-function of the \pkg{psych}-package.} \item{show.kurtosis}{Logical, if \code{TRUE}, the kurtosis for each item will also be shown (see \code{\link[psych]{kurtosi}} and \code{\link[psych]{describe}} in the \code{psych}-package for more details.} \item{digits.stats}{amount of digits for rounding the skewness and kurtosis valuess. Default is 2, i.e. skewness and kurtosis values have 2 digits after decimal point.} \item{file}{Destination file, if the output should be saved as file. If \code{NULL} (default), the output will be saved as temporary file and opened either in the IDE's viewer pane or the default web browser.} \item{encoding}{Character vector, indicating the charset encoding used for variable and value labels. Default is \code{"UTF-8"}. For Windows Systems, \code{encoding = "Windows-1252"} might be necessary for proper display of special characters.} \item{CSS}{A \code{\link{list}} with user-defined style-sheet-definitions, according to the \href{http://www.w3.org/Style/CSS/}{official CSS syntax}. See 'Details' or \href{https://strengejacke.github.io/sjPlot/articles/table_css.html}{this package-vignette}.} \item{use.viewer}{Logical, if \code{TRUE}, the HTML table is shown in the IDE's viewer pane. If \code{FALSE} or no viewer available, the HTML table is opened in a web browser.} \item{remove.spaces}{Logical, if \code{TRUE}, leading spaces are removed from all lines in the final string that contains the html-data. Use this, if you want to remove parantheses for html-tags. The html-source may look less pretty, but it may help when exporting html-tables to office tools.} } \value{ Invisibly returns \itemize{ \item the web page style sheet (\code{page.style}), \item the web page content (\code{page.content}), \item the complete html-output (\code{page.complete}) and \item the html-table with inline-css for use with knitr (\code{knitr}) } for further use. } \description{ Shows the results of stacked frequencies (such as likert scales) as HTML table. This function is useful when several items with identical scale/categories should be printed as table to compare their distributions (e.g. when plotting scales like SF, Barthel-Index, Quality-of-Life-scales etc.). } \examples{ # ------------------------------- # random sample # ------------------------------- # prepare data for 4-category likert scale, 5 items likert_4 <- data.frame( as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.2, 0.3, 0.1, 0.4))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.5, 0.25, 0.15, 0.1))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.25, 0.1, 0.4, 0.25))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.1, 0.4, 0.4, 0.1))), as.factor(sample(1:4, 500, replace = TRUE, prob = c(0.35, 0.25, 0.15, 0.25))) ) # create labels levels_4 <- c("Independent", "Slightly dependent", "Dependent", "Severely dependent") # create item labels items <- c("Q1", "Q2", "Q3", "Q4", "Q5") # plot stacked frequencies of 5 (ordered) item-scales \dontrun{ if (interactive()) { tab_stackfrq(likert_4, value.labels = levels_4, var.labels = items) # ------------------------------- # Data from the EUROFAMCARE sample dataset # Auto-detection of labels # ------------------------------- data(efc) # recveive first item of COPE-index scale start <- which(colnames(efc) == "c82cop1") # recveive first item of COPE-index scale end <- which(colnames(efc) == "c90cop9") tab_stackfrq(efc[, c(start:end)], alternate.rows = TRUE) tab_stackfrq(efc[, c(start:end)], alternate.rows = TRUE, show.n = TRUE, show.na = TRUE) # -------------------------------- # User defined style sheet # -------------------------------- tab_stackfrq(efc[, c(start:end)], alternate.rows = TRUE, show.total = TRUE, show.skew = TRUE, show.kurtosis = TRUE, CSS = list(css.ncol = "border-left:1px dotted black;", css.summary = "font-style:italic;")) } } } sjPlot/man/plot_grpfrq.Rd0000644000176200001440000002651013776334350015145 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_grpfrq.R \name{plot_grpfrq} \alias{plot_grpfrq} \title{Plot grouped or stacked frequencies} \usage{ plot_grpfrq( var.cnt, var.grp, type = c("bar", "dot", "line", "boxplot", "violin"), bar.pos = c("dodge", "stack"), weight.by = NULL, intr.var = NULL, title = "", title.wtd.suffix = NULL, legend.title = NULL, axis.titles = NULL, axis.labels = NULL, legend.labels = NULL, intr.var.labels = NULL, wrap.title = 50, wrap.labels = 15, wrap.legend.title = 20, wrap.legend.labels = 20, geom.size = NULL, geom.spacing = 0.15, geom.colors = "Paired", show.values = TRUE, show.n = TRUE, show.prc = TRUE, show.axis.values = TRUE, show.ci = FALSE, show.grpcnt = FALSE, show.legend = TRUE, show.na = FALSE, show.summary = FALSE, drop.empty = TRUE, auto.group = NULL, ylim = NULL, grid.breaks = NULL, expand.grid = FALSE, inner.box.width = 0.15, inner.box.dotsize = 3, smooth.lines = FALSE, emph.dots = TRUE, summary.pos = "r", facet.grid = FALSE, coord.flip = FALSE, y.offset = NULL, vjust = "bottom", hjust = "center" ) } \arguments{ \item{var.cnt}{Vector of counts, for which frequencies or means will be plotted or printed.} \item{var.grp}{Factor with the cross-classifying variable, where \code{var.cnt} is grouped into the categories represented by \code{var.grp}.} \item{type}{Specifies the plot type. May be abbreviated. \describe{ \item{\code{"bar"}}{for simple bars (default)} \item{\code{"dot"}}{for a dot plot} \item{\code{"histogram"}}{for a histogram (does not apply to grouped frequencies)} \item{\code{"line"}}{for a line-styled histogram with filled area} \item{\code{"density"}}{for a density plot (does not apply to grouped frequencies)} \item{\code{"boxplot"}}{for box plot} \item{\code{"violin"}}{for violin plots} }} \item{bar.pos}{Indicates whether bars should be positioned side-by-side (default), or stacked (\code{bar.pos = "stack"}). May be abbreviated.} \item{weight.by}{Vector of weights that will be applied to weight all cases. Must be a vector of same length as the input vector. Default is \code{NULL}, so no weights are used.} \item{intr.var}{An interaction variable which can be used for box plots. Divides each category indicated by \code{var.grp} into the factors of \code{intr.var}, so that each category of \code{var.grp} is subgrouped into \code{intr.var}'s categories. Only applies when \code{type = "boxplot"} or \code{type = "violin"}.} \item{title}{character vector, used as plot title. Depending on plot type and function, will be set automatically. If \code{title = ""}, no title is printed. For effect-plots, may also be a character vector of length > 1, to define titles for each sub-plot or facet.} \item{title.wtd.suffix}{Suffix (as string) for the title, if \code{weight.by} is specified, e.g. \code{title.wtd.suffix=" (weighted)"}. Default is \code{NULL}, so title will not have a suffix when cases are weighted.} \item{legend.title}{character vector, used as title for the plot legend.} \item{axis.titles}{character vector of length one or two, defining the title(s) for the x-axis and y-axis.} \item{axis.labels}{character vector with labels used as axis labels. Optional argument, since in most cases, axis labels are set automatically.} \item{legend.labels}{character vector with labels for the guide/legend.} \item{intr.var.labels}{a character vector with labels for the x-axis breaks when having interaction variables included. These labels replace the \code{axis.labels}. Only applies, when using box or violin plots (i.e. \code{type = "boxplot"} or \code{"violin"}) and \code{intr.var} is not \code{NULL}.} \item{wrap.title}{numeric, determines how many chars of the plot title are displayed in one line and when a line break is inserted.} \item{wrap.labels}{numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{wrap.legend.title}{numeric, determines how many chars of the legend's title are displayed in one line and when a line break is inserted.} \item{wrap.legend.labels}{numeric, determines how many chars of the legend labels are displayed in one line and when a line break is inserted.} \item{geom.size}{size resp. width of the geoms (bar width, line thickness or point size, depending on plot type and function). Note that bar and bin widths mostly need smaller values than dot sizes.} \item{geom.spacing}{the spacing between geoms (i.e. bar spacing)} \item{geom.colors}{user defined color for geoms. See 'Details' in \code{\link{plot_grpfrq}}.} \item{show.values}{Logical, whether values should be plotted or not.} \item{show.n}{logical, if \code{TRUE}, adds total number of cases for each group or category to the labels.} \item{show.prc}{logical, if \code{TRUE} (default), percentage values are plotted to each bar If \code{FALSE}, percentage values are removed.} \item{show.axis.values}{logical, whether category, count or percentage values for the axis should be printed or not.} \item{show.ci}{Logical, if \code{TRUE)}, adds notches to the box plot, which are used to compare groups; if the notches of two boxes do not overlap, medians are considered to be significantly different.} \item{show.grpcnt}{logical, if \code{TRUE}, the count within each group is added to the category labels (e.g. \code{"Cat 1 (n=87)"}). Default value is \code{FALSE}.} \item{show.legend}{logical, if \code{TRUE}, and depending on plot type and function, a legend is added to the plot.} \item{show.na}{logical, if \code{TRUE}, \code{\link{NA}}'s (missing values) are added to the output.} \item{show.summary}{logical, if \code{TRUE} (default), a summary with chi-squared statistics (see \code{\link{chisq.test}}), Cramer's V or Phi-value etc. is shown. If a cell contains expected values lower than five (or lower than 10 if df is 1), the Fisher's exact test (see \code{\link{fisher.test}}) is computed instead of chi-squared test. If the table's matrix is larger than 2x2, Fisher's exact test with Monte Carlo simulation is computed.} \item{drop.empty}{Logical, if \code{TRUE} and the variable's values are labeled, values that have no observations are still printed in the table (with frequency \code{0}). If \code{FALSE}, values / factor levels with no occurrence in the data are omitted from the output.} \item{auto.group}{numeric value, indicating the minimum amount of unique values in the count variable, at which automatic grouping into smaller units is done (see \code{\link[sjmisc]{group_var}}). Default value for \code{auto.group} is \code{NULL}, i.e. auto-grouping is off. See \code{\link[sjmisc]{group_var}} for examples on grouping.} \item{ylim}{numeric vector of length two, defining lower and upper axis limits of the y scale. By default, this argument is set to \code{NULL}, i.e. the y-axis fits to the required range of the data.} \item{grid.breaks}{numeric; sets the distance between breaks for the axis, i.e. at every \code{grid.breaks}'th position a major grid is being printed.} \item{expand.grid}{logical, if \code{TRUE}, the plot grid is expanded, i.e. there is a small margin between axes and plotting region. Default is \code{FALSE}.} \item{inner.box.width}{width of the inner box plot that is plotted inside of violin plots. Only applies if \code{type = "violin"}. Default value is 0.15} \item{inner.box.dotsize}{size of mean dot insie a violin or box plot. Applies only when \code{type = "violin"} or \code{"boxplot"}.} \item{smooth.lines}{prints a smooth line curve. Only applies, when argument \code{type = "line"}.} \item{emph.dots}{logical, if \code{TRUE}, the groups of dots in a dot-plot are highlighted with a shaded rectangle.} \item{summary.pos}{position of the model summary which is printed when \code{show.summary} is \code{TRUE}. Default is \code{"r"}, i.e. it's printed to the upper right corner. Use \code{"l"} for upper left corner.} \item{facet.grid}{\code{TRUE} to arrange the lay out of of multiple plots in a grid of an integrated single plot. This argument calls \code{\link[ggplot2]{facet_wrap}} or \code{\link[ggplot2]{facet_grid}} to arrange plots. Use \code{\link{plot_grid}} to plot multiple plot-objects as an arranged grid with \code{\link[gridExtra]{grid.arrange}}.} \item{coord.flip}{logical, if \code{TRUE}, the x and y axis are swapped.} \item{y.offset}{numeric, offset for text labels when their alignment is adjusted to the top/bottom of the geom (see \code{hjust} and \code{vjust}).} \item{vjust}{character vector, indicating the vertical position of value labels. Allowed are same values as for \code{vjust} aesthetics from \code{ggplot2}: "left", "center", "right", "bottom", "middle", "top" and new options like "inward" and "outward", which align text towards and away from the center of the plot respectively.} \item{hjust}{character vector, indicating the horizontal position of value labels. Allowed are same values as for \code{vjust} aesthetics from \code{ggplot2}: "left", "center", "right", "bottom", "middle", "top" and new options like "inward" and "outward", which align text towards and away from the center of the plot respectively.} } \value{ A ggplot-object. } \description{ Plot grouped or stacked frequencies of variables as bar/dot, box or violin plots, or line plot. } \details{ \code{geom.colors} may be a character vector of color values in hex-format, valid color value names (see \code{demo("colors")} or a name of a \href{ https://colorbrewer2.org/}{color brewer} palette. Following options are valid for the \code{geom.colors} argument: \itemize{ \item If not specified, a default color brewer palette will be used, which is suitable for the plot style (i.e. diverging for likert scales, qualitative for grouped bars etc.). \item If \code{"gs"}, a greyscale will be used. \item If \code{"bw"}, and plot-type is a line-plot, the plot is black/white and uses different line types to distinguish groups (see \href{https://strengejacke.github.io/sjPlot/articles/blackwhitefigures.html}{this package-vignette}). \item If \code{geom.colors} is any valid color brewer palette name, the related palette will be used. Use \code{RColorBrewer::display.brewer.all()} to view all available palette names. \item Else specify own color values or names as vector (e.g. \code{geom.colors = c("#f00000", "#00ff00")}). } } \examples{ data(efc) plot_grpfrq(efc$e17age, efc$e16sex, show.values = FALSE) # boxplot plot_grpfrq(efc$e17age, efc$e42dep, type = "box") # grouped bars plot_grpfrq(efc$e42dep, efc$e16sex, title = NULL) # box plots with interaction variable plot_grpfrq(efc$e17age, efc$e42dep, intr.var = efc$e16sex, type = "box") # Grouped bar plot plot_grpfrq(efc$neg_c_7, efc$e42dep, show.values = FALSE) # same data as line plot plot_grpfrq(efc$neg_c_7, efc$e42dep, type = "line") # show ony categories where we have data (i.e. drop zero-counts) library(dplyr) efc <- dplyr::filter(efc, e42dep \%in\% c(3,4)) plot_grpfrq(efc$c161sex, efc$e42dep, drop.empty = TRUE) # show all categories, even if not in data plot_grpfrq(efc$c161sex, efc$e42dep, drop.empty = FALSE) } sjPlot/man/dist_f.Rd0000644000176200001440000000417013567425621014054 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sjPlotDist.R \name{dist_f} \alias{dist_f} \title{Plot F distributions} \usage{ dist_f( f = NULL, deg.f1 = NULL, deg.f2 = NULL, p = NULL, xmax = NULL, geom.colors = NULL, geom.alpha = 0.7 ) } \arguments{ \item{f}{Numeric, optional. If specified, an F distribution with \code{deg.f1} and \code{deg.f2} degrees of freedom is plotted and a shaded area at \code{f} value position is plotted that indicates whether or not the specified value is significant or not. If both \code{f} and \code{p} are not specified, a distribution without shaded area is plotted.} \item{deg.f1}{Numeric. The first degrees of freedom for the F distribution. Needs to be specified.} \item{deg.f2}{Numeric. The second degrees of freedom for the F distribution. Needs to be specified.} \item{p}{Numeric, optional. If specified, a F distribution with \code{deg.f1} and \code{deg.f2} degrees of freedom is plotted and a shaded area at the position where the specified p-level starts is plotted. If both \code{f} and \code{p} are not specified, a distribution without shaded area is plotted.} \item{xmax}{Numeric, optional. Specifies the maximum x-axis-value. If not specified, the x-axis ranges to a value where a p-level of 0.00001 is reached.} \item{geom.colors}{user defined color for geoms. See 'Details' in \code{\link{plot_grpfrq}}.} \item{geom.alpha}{Specifies the alpha-level of the shaded area. Default is 0.7, range between 0 to 1.} } \description{ This function plots a simple F distribution or an F distribution with shaded areas that indicate at which F value a significant p-level is reached. } \examples{ # a simple F distribution for 6 and 45 degrees of freedom dist_f(deg.f1 = 6, deg.f2 = 45) # F distribution for 6 and 45 degrees of freedom, # and a shaded area starting at F value of two. # F-values equal or greater than 2.31 are "significant" dist_f(f = 2, deg.f1 = 6, deg.f2 = 45) # F distribution for 6 and 45 degrees of freedom, # and a shaded area starting at a p-level of 0.2 # (F-Value about 1.5). dist_f(p = 0.2, deg.f1 = 6, deg.f2 = 45) } sjPlot/man/figures/0000755000176200001440000000000013446531455013756 5ustar liggesuserssjPlot/man/figures/logo.png0000644000176200001440000004005613446531455015431 0ustar liggesusersPNG  IHDRxb]e iCCPICC Profile8U]hU>sg#$Sl4t? % V46nI6"dΘ83OEP|1Ŀ (>/ % (>P苦;3ie|{g蹪X-2s=+WQ+]L6O w[C{_F qb Uvz?Zb1@/zcs>~if,ӈUSjF 1_Mjbuݠpamhmçϙ>a\+5%QKFkm}ۖ?ޚD\!~6,-7SثŜvķ5Z;[rmS5{yDyH}r9|-ăFAJjI.[/]mK 7KRDrYQO-Q||6 (0 MXd(@h2_f<:”_δ*d>e\c?~,7?& ك^2Iq2"y@g|UP`o pHYs  YiTXtXML:com.adobe.xmp 1 L'Y:bIDATx}|\ՙw5Yٖl˘<I!ByIx a!$v}/Xfde!nƀ{UsJHHܙ{=;_9^MeiYMu,˵䋻>ef.[D? iQĊGF.ZӒnIڻ#kO<öbEUӣ @~,$K6M_Ы8~o'ּg;X4 `wݴg${-VNGA?VOڬVO&-p(_Z+o|rl"%K4cD b5@<*tj8I",ͪiH|,qp^djtdj}+O͛u[]}JF%ooW/%r[̱w{ cшD"E4I9DC*[-`N}٣OJ5ZVb{|8~fDKpbn |6(aY|,A -#VYXZ(3ˊdxKqu4$QU7y(9@OhǵZ%j^|Jztπk3DVn5rEk[=/gxRv! XB~ňxZtU6=8Nfo@XZ)( MXhۆq9geOKX%68FVű1-r2iX6H&E;NwIrcfu+o{edJik՛XM%Y}].g67;(AiS}Yh  YLK`Y;>}!BE \=ċ4J+s6 wCb̳;|r&!Xv/# u@[GC N,^ 0 b_ۘOA(߿K+7hպbmձ:{4Z6(B)_*ص{mBW$D"9jZ,HL<"0Ͼ9 P 0גc~SY8b¬Cj9;mP%) JOX9a]۟ }nOX,*0}Eahkv=|aŅ68j 2am ;\j$ד JBA)HǺ/4i++ !TW13n,;y!L:$;JU8Lש8* 46`v^eEn=Lb@4ݡ=tF͘_Jy/p{^~8dwTAAKS("g_?s!jb5GAT/Q$eR vYJ4lEK~o͕[&esTWTԵ}vBCVH"aϪY0{޹Q)T l&M5NY4X&+4ܞ 㕀߇ѩ?Okyqgmi*|3ijF{~򛃅x.=;X0_X4xSM^'Zy)_{) Ķa?/c`Vp llGHqV/gϜY0-K}+x#BxPȟBb@#Žg ~iR "Dڬ!QO'(W6bnOL(:g5]n퍄'’4W 0瞊F¤u74"QqDz'iZS0*o5V=q9, ض@>A8e+iVI gϱD"VPX ]iC"-{K^V2?Jh%wx K>ߣbm[t/e܋o)E\_⸫4ئt`V-H==p{­s2btbw[y4;\~voai]UG\/@lўMOP Z d'^;;½X9*J䁐3$ݞ. `W=ǔx`1, IbG ~NϾ+"g(t5Ś{{>_1 "QO`CK/d1ҽ8agza\$v4bnOp{r~.tSlCr%c\MbFCkV2l=iM1)E:+3jax6b%j-g½ocʞ-dMC )Ezx?A Ռ#,`DL۳3N8vp~Jp'+ d1$=[mz UQ֑_lq9D+\²X,b t=VkHOv ڳQ.A:+(ɵLnqLuG۴yY䔪%*,TD% ФN ;GavȐ߆d:3M3}vWa({1p/jvLSPvQ ax4-l̀s#A=aa@ayeC{*(5 G{7~&^L@’]-ܰA,sg\ Vzγ!PD1)aԴ@DٳƫXn&/u$4^dDj,p;I"`h~3x\Ub֑+]8SLߴw TF/'kYi1x XnwHIiưp{^aI̥C  a=UKƭ")a~̞t{ Gs % ZchF#9܋r wy"vƑߩ1QY얪%2nH8Lşg8;ƃmWL#ͯs)+)A)cY u[E{6 %4c "y=^|B@ gɴt1H|M% IhW`~QVnOhܴ3#[*5Kl}EgffP,u8\ˏ%agm:M _{Ӟ:^`8:xRin&lvnfU(~eCWa &UX$Ђ)gM-J$6`Ͱ$Qnh Mn3HIp5[TR14c8ٳ f.DC{Sl+ZzpZ( w!a2X-v4 Fˆ.=\ZvNO,8A 94wc`oV]"9 FX$Kw ' `!qI^N>sH!6Z4Ņ!pEhHm mH\t=qdhB) ,$w.ȃa.3v% n i#e rlzz3.Њ>nr s41,I n %؂ 5K=/c y(w.0Ɓll. m]J\tFb2VL,;^W.D0# 'q@ްh|i헿\J%ҠuL)&A0@0FL[i\]CF:7ɡưL`*FX)?89iUDr|J/,)+3AV8q ZMBS}|Irb9%i C !! ijn_. iJvd2'l1QG+=X܍1@c7$ul@XcmLcݾԞoO6Aq)nQJ[S`zvDb3^ x8Ff!83/g9 _ ~K%gv' ߿p,57/*/Wʮfu(Jy8iR6AФyeF?&~t,{_X6Ǟ=uڥ%߾~D^yD1Ų=ab(SeZq^'ʯy+'-s'a%D{jjT2Ե8R&mܓQdBWiexLjOWދ\ @H` b {?ߠY*BlD ܱ NUҡ:%#p=_ z|\᩿5HɘiiY+e$VP~p#X<1 `}qew$A+,),9zp 2`e+>W yRyl@A P'p$e|H{1;ux]akg0׎6P[)]x *Je2vL`O7H  `+)+/߮WRuV^.s9(a8w\=ԈqGbT5?L}vjDv}X3 H?kĢM]a"D*ELnyO@ׁC )^POքz,BE0aVĠ"G \yzD|H6J r2&[+l5G4Dj xL=DiJTn1eǨB9yDv QLk8=spd&Hq(Iq7S#¡Q܈R [[wJY+`aS^& ɉع-i_n]0V,!3 aX>l[恫ˊʚ?-fECaPxOȜ @C![vrb̄0`,, T&,q F*LZҎ >%0r/3Ze:浬AzRpY&lSV QWhj$~IEXgءdKTyX(USG0 ›qSY9YPS"\ƶ˕yr#\c4fm(wn8^2Q5FACxQ" D g$`!ט L$20 :M@r/bgu㊔y7%߻~gz͛upd`m3y#N2Er& lUF]GD4H!)3A0WN#|KkApב;Oy_yT~qYRu}hj4SOj|^|+Ţؾ R9L +MPy8$0~w)CWN%O#s&c1J=X|Pj9 ;7A•!?"kÓ_- 5{GGeSe>3yM!c/ᶣ='vTŃ2 2 8w2qtep0]l˼E/F=",7 &aRCJ+;vh| VM;&WM.`l„N4F=#ecSBnNGi72UˇΫpblswFk4 8A;VEfU@67q1m_ E3HW&ܷ 5Sw|}s^9j7cnX>:k%qm&Eq|*m}f?p@*W8?հS@L;:K &xt.ޜErQQqvTM=/kQS $*[ϱ],`~i3 R̶+%.Kg:&cXg2MS,=^MbTX&#{$rcṖe9S"L&A0kO0%y[fY<~ ̼dM;3J?`&1#Ҷ@8($fC]Rg"BC,g{,01=Y.bm)e D`{f 9Eߺ ¯`9/F~3?Ln=وn0R/*2lJo9΄&lx|pc^77O9 6goCr~A)dq:*m8?]2K展lKioI"='tl`7Xw[8Tq 5{шEX'pjO nK7$:f3IMQǶKIn_ 6ak Ka&,RbO=rM]6\F(.13v";s{ApƢ`?Qab"PoH N8#߇RL5[xp;ObO3e^]re7ׂ]`. ?HHWqv#94&{ Kο&d1YBq$5#cPut _PI˱/nBDuq h~UhR=u Mڬ'59eOe ce&w2~vѭ6uZb3$S Cm+^F'yMɹrhL6e]]ӋnkX'2$瀻96\9BqcIUZ?t iy&Ki9ϥs\LŸísl"AV 9چM5vOO3X&sJv|ULhxA#.GBs$uޮp>+5)߭E̯*c!X͟$*+e2eV>_h$1F$bϮzCcVd 3Ace3[Q'_5\3i \ ItBp7cTwEI[Hp,εS[ ,xDͅ[qy wCDv M9dXq(c-\?RgaS)̊S2nrהV g%$ DfRR@R KeKL_t[bV<h6jU{?7/tX`1K$6NfY,x܂j;,kPc-I Ac5z3c<H5Y|rC-B>*;^mM7y'XTn.n)*I'R(.B5[n&r!0(J&I|mvy`xC"dUAsp\CXgYt]ʶrI]g{m'cLfi0;`O`ZwOW_x8Rry/3 ,E5܅B_cz7FPAM7N7/hXSirdJ"[/E]cޞ>u6><3қ7kppYhoη":e^j^uB&uܕ`xvsz[;͑{P W7) K(  `ta4Y0wr$R< V-m@ٱz9ðvb0Eiq<Ξq0QNoD#۵xdS\ C6ܖ7FsG`w{I 8iLR'ҫ~:Q%$j›F;2M!J Ij4_rH2MCI\Cw4jZ/Y'ik̶cㆀ<(dT1fxAvfx-xFc2ahu^7WwK}%؝Dsm1AAo~%fO/q[ֶt ɸ:{w3>VힴS~e~S7cD/l2a'^bÃ]áі"G(p,۽3G Z{O`KYpwF[F*S߳]I,)fy=1pdQǜF̳W ʊd:Rvz8Y-7Gb*}fmvbfzyg/W8w1?Xy@Sv 8sQE# kbzX,*hdsRXsKs@ ] vDr,9hU `ǔxHa}x"Ab}hR҉4 3›b7ǜXt VOY+gfR(^"Kr[4 D6܈ Lډ&loIqaPl3] \8%U"c(HK=Dk9NCAq_i:^*)}+ zf,^&L߃_r{ ]4:I,߮؆t!jҏfϒ%GҤ6"6d]ͮ1Jނp89EmUGF_ 1ifplҨ{ A5뀳p =IA2ƙKtU2glV-iZ=qg.h[&_܌Vt{Fܞw3ݞnvS 3G\K)t@o!{ք6ql/((&T9F]tN&OEb=B^Iyl:ks{2,y.jr!+@lOF%A%MN!,Iy[cHqG WJDkB[ g!0SsUf68dS/ic۽*Ynf;fbgvNZ˞,2vӷ eD= ^\MK' XPms r%ٶ0&Yt7[ A&:Z]'H|c>جPϭ۴J8oK~Ь͊e0#1?~~ 3_Cvb/ ,P4{ wŰg)\p-V 1Xohc{K/:`VJ-^k10[iRx706LGha[Bb>bLԸa?-KGEb%a^{ q DtսU0۰+~1K/%o5'g_6ư2Y&_+=_~rU17K<MAL9x’zOKX0{!2q EwN܋Kd!̰0$^燩p34\tTнHxRKdI"Tix2i>vn9U۲%Ϳ7cUw@SE\ie9N9~f& 8ru`γt/N6/΄j M:NL؝ѥܲ߅Vłujc٦4`6L`YJjEڍ;`?=N,el^f!,~'QLߍr)9@ce(noŒTc:,5kCw1Zy},m* 46ijݴ#9@1NL˰~<Qx ƛ p"VXY/^ڶs9 !7ګT[y9vZ? /]ض1`܍ܞj7i,bXrg*_x x\mڂe-HxCޟa$>6Kk;W:0q6NfUcn?{,J#筰)"Y7Yz8=}UIENDB`sjPlot/man/set_theme.Rd0000644000176200001440000002243413567425621014564 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sjPlotSetTheme.R \name{set_theme} \alias{set_theme} \title{Set global theme options for sjp-functions} \usage{ set_theme( base = theme_grey(), theme.font = NULL, title.color = "black", title.size = 1.2, title.align = "left", title.vjust = NULL, geom.outline.color = NULL, geom.outline.size = 0, geom.boxoutline.size = 0.5, geom.boxoutline.color = "black", geom.alpha = 1, geom.linetype = 1, geom.errorbar.size = 0.7, geom.errorbar.linetype = 1, geom.label.color = NULL, geom.label.size = 4, geom.label.alpha = 1, geom.label.angle = 0, axis.title.color = "grey30", axis.title.size = 1.1, axis.title.x.vjust = NULL, axis.title.y.vjust = NULL, axis.angle.x = 0, axis.angle.y = 0, axis.angle = NULL, axis.textcolor.x = "grey30", axis.textcolor.y = "grey30", axis.textcolor = NULL, axis.linecolor.x = NULL, axis.linecolor.y = NULL, axis.linecolor = NULL, axis.line.size = 0.5, axis.textsize.x = 1, axis.textsize.y = 1, axis.textsize = NULL, axis.tickslen = NULL, axis.tickscol = NULL, axis.ticksmar = NULL, axis.ticksize.x = NULL, axis.ticksize.y = NULL, panel.backcol = NULL, panel.bordercol = NULL, panel.col = NULL, panel.major.gridcol = NULL, panel.minor.gridcol = NULL, panel.gridcol = NULL, panel.gridcol.x = NULL, panel.gridcol.y = NULL, panel.major.linetype = 1, panel.minor.linetype = 1, plot.backcol = NULL, plot.bordercol = NULL, plot.col = NULL, plot.margins = NULL, legend.pos = "right", legend.just = NULL, legend.inside = FALSE, legend.size = 1, legend.color = "black", legend.title.size = 1, legend.title.color = "black", legend.title.face = "bold", legend.backgroundcol = "white", legend.bordercol = "white", legend.item.size = NULL, legend.item.backcol = "grey90", legend.item.bordercol = "white" ) } \arguments{ \item{base}{base theme where theme is built on. By default, all metrics from \code{theme_gray()} are used. See 'Details'.} \item{theme.font}{base font family for the plot.} \item{title.color}{Color of plot title. Default is \code{"black"}.} \item{title.size}{size of plot title. Default is 1.3.} \item{title.align}{alignment of plot title. Must be one of \code{"left"} (default), \code{"center"} or \code{"right"}. You may use initial letter only.} \item{title.vjust}{numeric, vertical adjustment for plot title.} \item{geom.outline.color}{Color of geom outline. Only applies, if \code{geom.outline.size} is larger than 0.} \item{geom.outline.size}{size of bar outlines. Default is 0.1. Use size of \code{0} to remove geom outline.} \item{geom.boxoutline.size}{size of outlines and median bar especially for boxplots. Default is 0.5. Use size of \code{0} to remove boxplot outline.} \item{geom.boxoutline.color}{Color of outlines and median bar especially for boxplots. Only applies, if \code{geom.boxoutline.size} is larger than 0.} \item{geom.alpha}{specifies the transparancy (alpha value) of geoms} \item{geom.linetype}{linetype of line geoms. Default is \code{1} (solid line).} \item{geom.errorbar.size}{size (thickness) of error bars. Default is \code{0.8}} \item{geom.errorbar.linetype}{linetype of error bars. Default is \code{1} (solid line).} \item{geom.label.color}{Color of geom's value and annotation labels} \item{geom.label.size}{size of geom's value and annotation labels} \item{geom.label.alpha}{alpha level of geom's value and annotation labels} \item{geom.label.angle}{angle of geom's value and annotation labels} \item{axis.title.color}{Color of x- and y-axis title labels} \item{axis.title.size}{size of x- and y-axis title labels} \item{axis.title.x.vjust}{numeric, vertical adjustment of x-axis-title.} \item{axis.title.y.vjust}{numeric, vertical adjustment of y-axis-title.} \item{axis.angle.x}{angle for x-axis labels} \item{axis.angle.y}{angle for y-axis labels} \item{axis.angle}{angle for x- and y-axis labels. If set, overrides both \code{axis.angle.x} and \code{axis.angle.y}} \item{axis.textcolor.x}{Color for x-axis labels. If not specified, a default dark gray color palette will be used for the labels.} \item{axis.textcolor.y}{Color for y-axis labels. If not specified, a default dark gray color palette will be used for the labels.} \item{axis.textcolor}{Color for both x- and y-axis labels. If set, overrides both \code{axis.textcolor.x} and \code{axis.textcolor.y}} \item{axis.linecolor.x}{Color of x-axis border} \item{axis.linecolor.y}{Color of y-axis border} \item{axis.linecolor}{Color for both x- and y-axis borders. If set, overrides both \code{axis.linecolor.x} and \code{axis.linecolor.y}.} \item{axis.line.size}{size (thickness) of axis lines. Only affected, if \code{axis.linecolor} is set.} \item{axis.textsize.x}{size of x-axis labels} \item{axis.textsize.y}{size of y-axis labels} \item{axis.textsize}{size for both x- and y-axis labels. If set, overrides both \code{axis.textsize.x} and \code{axis.textsize.y}.} \item{axis.tickslen}{length of axis tick marks} \item{axis.tickscol}{Color of axis tick marks} \item{axis.ticksmar}{margin between axis labels and tick marks} \item{axis.ticksize.x}{size of tick marks at x-axis.} \item{axis.ticksize.y}{size of tick marks at y-axis.} \item{panel.backcol}{Color of the diagram's background} \item{panel.bordercol}{Color of whole diagram border (panel border)} \item{panel.col}{Color of both diagram's border and background. If set, overrides both \code{panel.bordercol} and \code{panel.backcol}.} \item{panel.major.gridcol}{Color of the major grid lines of the diagram background} \item{panel.minor.gridcol}{Color of the minor grid lines of the diagram background} \item{panel.gridcol}{Color for both minor and major grid lines of the diagram background. If set, overrides both \code{panel.major.gridcol} and \code{panel.minor.gridcol}.} \item{panel.gridcol.x}{See \code{panel.gridcol}.} \item{panel.gridcol.y}{See \code{panel.gridcol}.} \item{panel.major.linetype}{line type for major grid lines} \item{panel.minor.linetype}{line type for minor grid lines} \item{plot.backcol}{Color of the plot's background} \item{plot.bordercol}{Color of whole plot's border (panel border)} \item{plot.col}{Color of both plot's region border and background. If set, overrides both \code{plot.backcol} and \code{plot.bordercol}.} \item{plot.margins}{numeric vector of length 4, indicating the top, right, bottom and left margin of the plot region.} \item{legend.pos}{position of the legend, if a legend is drawn. \describe{ \item{\emph{legend outside plot}}{ Use \code{"bottom"}, \code{"top"}, \code{"left"} or \code{"right"} to position the legend above, below, on the left or right side of the diagram. Right positioning is default. } \item{\emph{legend inside plot}}{ If \code{legend.inside = TRUE}, legend can be placed inside plot. Use \code{"top left"}, \code{"top right"}, \code{"bottom left"} and \code{"bottom right"} to position legend in any of these corners, or a two-element numeric vector with values from 0-1. See also \code{legend.inside}. } }} \item{legend.just}{justification of legend, relative to its position (\code{"center"} or two-element numeric vector with values from 0-1. By default (outside legend), justification is centered. If legend is inside and justification not specified, legend justification is set according to legend position.} \item{legend.inside}{logical, use \code{TRUE} to put legend inside the plotting area. See \code{legend.pos}.} \item{legend.size}{text size of the legend. Default is 1. Relative size, so recommended values are from 0.3 to 2.5} \item{legend.color}{Color of the legend labels} \item{legend.title.size}{text size of the legend title} \item{legend.title.color}{Color of the legend title} \item{legend.title.face}{font face of the legend title. By default, \code{"bold"} face is used.} \item{legend.backgroundcol}{fill color of the legend's background. Default is \code{"white"}, so no visible background is drawn.} \item{legend.bordercol}{Color of the legend's border. Default is \code{"white"}, so no visible border is drawn.} \item{legend.item.size}{size of legend's item (legend key), in centimetres.} \item{legend.item.backcol}{fill color of the legend's item-background. Default is \code{"grey90"}.} \item{legend.item.bordercol}{Color of the legend's item-border. Default is \code{"white"}.} } \value{ The customized theme object, or \code{NULL}, if a ggplot-theme was used. } \description{ Set global theme options for sjp-functions. } \examples{ \dontrun{ library(sjmisc) data(efc) # set sjPlot-defaults, a slightly modification # of the ggplot base theme set_theme() # legends of all plots inside set_theme(legend.pos = "top left", legend.inside = TRUE) plot_xtab(efc$e42dep, efc$e16sex) # Use classic-theme. you may need to # load the ggplot2-library. library(ggplot2) set_theme(base = theme_classic()) plot_frq(efc$e42dep) # adjust value labels set_theme( geom.label.size = 3.5, geom.label.color = "#3366cc", geom.label.angle = 90 ) # hjust-aes needs adjustment for this update_geom_defaults('text', list(hjust = -0.1)) plot_xtab(efc$e42dep, efc$e16sex, vjust = "center", hjust = "center") # Create own theme based on classic-theme set_theme( base = theme_classic(), axis.linecolor = "grey50", axis.textcolor = "#6699cc" ) plot_frq(efc$e42dep)} } \seealso{ \code{\link{sjPlot-themes}} } sjPlot/man/tab_corr.Rd0000644000176200001440000001447314051650703014374 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tab_corr.R \name{tab_corr} \alias{tab_corr} \title{Summary of correlations as HTML table} \usage{ tab_corr( data, na.deletion = c("listwise", "pairwise"), corr.method = c("pearson", "spearman", "kendall"), title = NULL, var.labels = NULL, wrap.labels = 40, show.p = TRUE, p.numeric = FALSE, fade.ns = TRUE, val.rm = NULL, digits = 3, triangle = "both", string.diag = NULL, CSS = NULL, encoding = NULL, file = NULL, use.viewer = TRUE, remove.spaces = TRUE ) } \arguments{ \item{data}{Matrix with correlation coefficients as returned by the \code{\link{cor}}-function, or a \code{data.frame} of variables where correlations between columns should be computed.} \item{na.deletion}{Indicates how missing values are treated. May be either \code{"listwise"} (default) or \code{"pairwise"}. May be abbreviated.} \item{corr.method}{Indicates the correlation computation method. May be one of \code{"pearson"} (default), \code{"spearman"} or \code{"kendall"}. May be abbreviated.} \item{title}{String, will be used as table caption.} \item{var.labels}{Character vector with variable names, which will be used to label variables in the output.} \item{wrap.labels}{Numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{show.p}{Logical, if \code{TRUE}, p-values are also printed.} \item{p.numeric}{Logical, if \code{TRUE}, the p-values are printed as numbers. If \code{FALSE} (default), asterisks are used.} \item{fade.ns}{Logical, if \code{TRUE} (default), non-significant correlation-values appear faded (by using a lighter grey text color). See 'Note'.} \item{val.rm}{Specify a number between 0 and 1 to suppress the output of correlation values that are smaller than \code{val.rm}. The absolute correlation values are used, so a correlation value of \code{-.5} would be greater than \code{val.rm = .4} and thus not be omitted. By default, this argument is \code{NULL}, hence all values are shown in the table. If a correlation value is below the specified value of \code{val.rm}, it is still printed to the HTML table, but made "invisible" with white foreground color. You can use the \code{CSS} argument (\code{"css.valueremove"}) to change color and appearance of those correlation value that are smaller than the limit specified by \code{val.rm}.} \item{digits}{Amount of decimals for estimates} \item{triangle}{Indicates whether only the upper right (use \code{"upper"}), lower left (use \code{"lower"}) or both (use \code{"both"}) triangles of the correlation table is filled with values. Default is \code{"both"}. You can specifiy the inital letter only.} \item{string.diag}{A vector with string values of the same length as \code{ncol(data)} (number of correlated items) that can be used to display content in the diagonal cells where row and column item are identical (i.e. the "self-correlation"). By defauilt, this argument is \code{NULL} and the diagnal cells are empty.} \item{CSS}{A \code{\link{list}} with user-defined style-sheet-definitions, according to the \href{http://www.w3.org/Style/CSS/}{official CSS syntax}. See 'Details' or \href{https://strengejacke.github.io/sjPlot/articles/table_css.html}{this package-vignette}.} \item{encoding}{Character vector, indicating the charset encoding used for variable and value labels. Default is \code{"UTF-8"}. For Windows Systems, \code{encoding = "Windows-1252"} might be necessary for proper display of special characters.} \item{file}{Destination file, if the output should be saved as file. If \code{NULL} (default), the output will be saved as temporary file and opened either in the IDE's viewer pane or the default web browser.} \item{use.viewer}{Logical, if \code{TRUE}, the HTML table is shown in the IDE's viewer pane. If \code{FALSE} or no viewer available, the HTML table is opened in a web browser.} \item{remove.spaces}{Logical, if \code{TRUE}, leading spaces are removed from all lines in the final string that contains the html-data. Use this, if you want to remove parantheses for html-tags. The html-source may look less pretty, but it may help when exporting html-tables to office tools.} } \value{ Invisibly returns \itemize{ \item the web page style sheet (\code{page.style}), \item the web page content (\code{page.content}), \item the complete html-output (\code{page.complete}) and \item the html-table with inline-css for use with knitr (\code{knitr}) } for further use. } \description{ Shows the results of a computed correlation as HTML table. Requires either a \code{\link{data.frame}} or a matrix with correlation coefficients as returned by the \code{\link{cor}}-function. } \note{ If \code{data} is a matrix with correlation coefficients as returned by the \code{\link{cor}}-function, p-values can't be computed. Thus, \code{show.p}, \code{p.numeric} and \code{fade.ns} only have an effect if \code{data} is a \code{\link{data.frame}}. } \examples{ \dontrun{ if (interactive()) { # Data from the EUROFAMCARE sample dataset library(sjmisc) data(efc) # retrieve variable and value labels varlabs <- get_label(efc) # recveive first item of COPE-index scale start <- which(colnames(efc) == "c83cop2") # recveive last item of COPE-index scale end <- which(colnames(efc) == "c88cop7") # create data frame with COPE-index scale mydf <- data.frame(efc[, c(start:end)]) colnames(mydf) <- varlabs[c(start:end)] # we have high correlations here, because all items # belong to one factor. tab_corr(mydf, p.numeric = TRUE) # auto-detection of labels, only lower triangle tab_corr(efc[, c(start:end)], triangle = "lower") # auto-detection of labels, only lower triangle, all correlation # values smaller than 0.3 are not shown in the table tab_corr(efc[, c(start:end)], triangle = "lower", val.rm = 0.3) # auto-detection of labels, only lower triangle, all correlation # values smaller than 0.3 are printed in blue tab_corr(efc[, c(start:end)], triangle = "lower",val.rm = 0.3, CSS = list(css.valueremove = 'color:blue;')) }} } sjPlot/man/plot_gpt.Rd0000644000176200001440000001301413776334350014431 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_gpt.R \name{plot_gpt} \alias{plot_gpt} \title{Plot grouped proportional tables} \usage{ plot_gpt( data, x, y, grp, colors = "metro", geom.size = 2.5, shape.fill.color = "#f0f0f0", shapes = c(15, 16, 17, 18, 21, 22, 23, 24, 25, 7, 8, 9, 10, 12), title = NULL, axis.labels = NULL, axis.titles = NULL, legend.title = NULL, legend.labels = NULL, wrap.title = 50, wrap.labels = 15, wrap.legend.title = 20, wrap.legend.labels = 20, axis.lim = NULL, grid.breaks = NULL, show.total = TRUE, annotate.total = TRUE, show.p = TRUE, show.n = TRUE ) } \arguments{ \item{data}{A data frame, or a grouped data frame.} \item{x}{Categorical variable, where the proportion of each category in \code{x} for the highest category of \code{y} will be printed along the x-axis.} \item{y}{Categorical or numeric variable. If not a binary variable, \code{y} will be recoded into a binary variable, dichtomized at the highest category and all remaining categories.} \item{grp}{Grouping variable, which will define the y-axis} \item{colors}{May be a character vector of color values in hex-format, valid color value names (see \code{demo("colors")}) or a name of a pre-defined color palette. Following options are valid for the \code{colors} argument: \itemize{ \item If not specified, a default color brewer palette will be used, which is suitable for the plot style. \item If \code{"gs"}, a greyscale will be used. \item If \code{"bw"}, and plot-type is a line-plot, the plot is black/white and uses different line types to distinguish groups (see \href{https://strengejacke.github.io/sjPlot/articles/blackwhitefigures.html}{this package-vignette}). \item If \code{colors} is any valid color brewer palette name, the related palette will be used. Use \code{RColorBrewer::display.brewer.all()} to view all available palette names. \item There are some pre-defined color palettes in this package, see \code{\link{sjPlot-themes}} for details. \item Else specify own color values or names as vector (e.g. \code{colors = "#00ff00"} or \code{colors = c("firebrick", "blue")}). }} \item{geom.size}{size resp. width of the geoms (bar width, line thickness or point size, depending on plot type and function). Note that bar and bin widths mostly need smaller values than dot sizes.} \item{shape.fill.color}{Optional color vector, fill-color for non-filled shapes} \item{shapes}{Numeric vector with shape styles, used to map the different categories of \code{x}.} \item{title}{Character vector, used as plot title. By default, \code{\link[sjlabelled]{response_labels}} is called to retrieve the label of the dependent variable, which will be used as title. Use \code{title = ""} to remove title.} \item{axis.labels}{character vector with labels used as axis labels. Optional argument, since in most cases, axis labels are set automatically.} \item{axis.titles}{character vector of length one or two, defining the title(s) for the x-axis and y-axis.} \item{legend.title}{Character vector, used as legend title for plots that have a legend.} \item{legend.labels}{character vector with labels for the guide/legend.} \item{wrap.title}{Numeric, determines how many chars of the plot title are displayed in one line and when a line break is inserted.} \item{wrap.labels}{numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{wrap.legend.title}{numeric, determines how many chars of the legend's title are displayed in one line and when a line break is inserted.} \item{wrap.legend.labels}{numeric, determines how many chars of the legend labels are displayed in one line and when a line break is inserted.} \item{axis.lim}{Numeric vector of length 2, defining the range of the plot axis. Depending on plot type, may effect either x- or y-axis, or both. For multiple plot outputs (e.g., from \code{type = "eff"} or \code{type = "slope"} in \code{\link{plot_model}}), \code{axis.lim} may also be a list of vectors of length 2, defining axis limits for each plot (only if non-faceted).} \item{grid.breaks}{numeric; sets the distance between breaks for the axis, i.e. at every \code{grid.breaks}'th position a major grid is being printed.} \item{show.total}{Logical, if \code{TRUE}, a total summary line for all aggregated \code{grp} is added.} \item{annotate.total}{Logical, if \code{TRUE} and \code{show.total = TRUE}, the total-row in the figure will be highlighted with a slightly shaded background.} \item{show.p}{Logical, adds significance levels to values, or value and variable labels.} \item{show.n}{logical, if \code{TRUE}, adds total number of cases for each group or category to the labels.} } \value{ A ggplot-object. } \description{ Plot grouped proportional crosstables, where the proportion of each level of \code{x} for the highest category in \code{y} is plotted, for each subgroup of \code{grp}. } \details{ The p-values are based on \code{\link[stats]{chisq.test}} of \code{x} and \code{y} for each \code{grp}. } \examples{ data(efc) # the proportion of dependency levels in female # elderly, for each family carer's relationship # to elderly plot_gpt(efc, e42dep, e16sex, e15relat) # proportion of educational levels in highest # dependency category of elderly, for different # care levels plot_gpt(efc, c172code, e42dep, n4pstu) } sjPlot/man/efc.Rd0000644000176200001440000000033113446531455013333 0ustar liggesusers\docType{data} \name{efc} \alias{efc} \title{Sample dataset from the EUROFAMCARE project} \description{ A SPSS sample data set, imported with the \code{\link[sjlabelled]{read_spss}} function. } \keyword{data} sjPlot/man/plot_model.Rd0000644000176200001440000007211614073077247014747 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_model.R \name{plot_model} \alias{plot_model} \alias{get_model_data} \title{Plot regression models} \usage{ plot_model( model, type = c("est", "re", "eff", "emm", "pred", "int", "std", "std2", "slope", "resid", "diag"), transform, terms = NULL, sort.est = NULL, rm.terms = NULL, group.terms = NULL, order.terms = NULL, pred.type = c("fe", "re"), mdrt.values = c("minmax", "meansd", "zeromax", "quart", "all"), ri.nr = NULL, title = NULL, axis.title = NULL, axis.labels = NULL, legend.title = NULL, wrap.title = 50, wrap.labels = 25, axis.lim = NULL, grid.breaks = NULL, ci.lvl = NULL, se = NULL, robust = FALSE, vcov.fun = NULL, vcov.type = c("HC3", "const", "HC", "HC0", "HC1", "HC2", "HC4", "HC4m", "HC5"), vcov.args = NULL, colors = "Set1", show.intercept = FALSE, show.values = FALSE, show.p = TRUE, show.data = FALSE, show.legend = TRUE, show.zeroinf = TRUE, value.offset = NULL, value.size, jitter = NULL, digits = 2, dot.size = NULL, line.size = NULL, vline.color = NULL, p.threshold = c(0.05, 0.01, 0.001), p.adjust = NULL, grid, case, auto.label = TRUE, prefix.labels = c("none", "varname", "label"), bpe = "median", bpe.style = "line", bpe.color = "white", ci.style = c("whisker", "bar"), ... ) get_model_data( model, type = c("est", "re", "eff", "pred", "int", "std", "std2", "slope", "resid", "diag"), transform, terms = NULL, sort.est = NULL, rm.terms = NULL, group.terms = NULL, order.terms = NULL, pred.type = c("fe", "re"), ri.nr = NULL, ci.lvl = NULL, colors = "Set1", grid, case = "parsed", digits = 2, ... ) } \arguments{ \item{model}{A regression model object. Depending on the \code{type}, many kinds of models are supported, e.g. from packages like \pkg{stats}, \pkg{lme4}, \pkg{nlme}, \pkg{rstanarm}, \pkg{survey}, \pkg{glmmTMB}, \pkg{MASS}, \pkg{brms} etc.} \item{type}{Type of plot. There are three groups of plot-types: \cr \cr \emph{Coefficients} (\href{https://strengejacke.github.io/sjPlot/articles/plot_model_estimates.html}{related vignette}) \describe{ \item{\code{type = "est"}}{Forest-plot of estimates. If the fitted model only contains one predictor, slope-line is plotted.} \item{\code{type = "re"}}{For mixed effects models, plots the random effects.} \item{\code{type = "std"}}{Forest-plot of standardized coefficients.} \item{\code{type = "std2"}}{Forest-plot of standardized coefficients, however, standardization is done by dividing by two SD (see 'Details').} } \emph{Marginal Effects} (\href{https://strengejacke.github.io/sjPlot/articles/plot_marginal_effects.html}{related vignette}) \describe{ \item{\code{type = "pred"}}{Predicted values (marginal effects) for specific model terms. See \code{\link[ggeffects]{ggpredict}} for details.} \item{\code{type = "eff"}}{Similar to \code{type = "pred"}, however, discrete predictors are held constant at their proportions (not reference level). See \code{\link[ggeffects]{ggeffect}} for details.} \item{\code{type = "emm"}}{Similar to \code{type = "eff"}, see \code{\link[ggeffects]{ggemmeans}} for details.} \item{\code{type = "int"}}{Marginal effects of interaction terms in \code{model}.} } \emph{Model diagnostics} \describe{ \item{\code{type = "slope"}}{Slope of coefficients for each single predictor, against the response (linear relationship between each model term and response). See 'Details'.} \item{\code{type = "resid"}}{Slope of coefficients for each single predictor, against the residuals (linear relationship between each model term and residuals). See 'Details'.} \item{\code{type = "diag"}}{Check model assumptions. See 'Details'.} } \strong{Note:} For mixed models, the diagnostic plots like linear relationship or check for Homoscedasticity, do \strong{not} take the uncertainty of random effects into account, but is only based on the fixed effects part of the model.} \item{transform}{A character vector, naming a function that will be applied on estimates and confidence intervals. By default, \code{transform} will automatically use \code{"exp"} as transformation for applicable classes of \code{model} (e.g. logistic or poisson regression). Estimates of linear models remain untransformed. Use \code{NULL} if you want the raw, non-transformed estimates.} \item{terms}{Character vector with the names of those terms from \code{model} that should be plotted. This argument depends on the plot-type: \describe{ \item{\emph{Coefficients}}{Select terms that should be plotted. All other term are removed from the output. Note that the term names must match the names of the model's coefficients. For factors, this means that the variable name is suffixed with the related factor level, and each category counts as one term. E.g. \code{rm.terms = "t_name [2,3]"} would remove the terms \code{"t_name2"} and \code{"t_name3"} (assuming that the variable \code{t_name} is categorical and has at least the factor levels \code{2} and \code{3}). Another example for the \emph{iris}-dataset: \code{terms = "Species"} would not work, instead you would write \code{terms = "Species [versicolor,virginica]"} to remove these two levels, or \code{terms = "Speciesversicolor"} if you just want to remove the level \emph{versicolor} from the plot.} \item{\emph{Marginal Effects}}{Here \code{terms} indicates for which terms marginal effects should be displayed. At least one term is required to calculate effects, maximum length is three terms, where the second and third term indicate the groups, i.e. predictions of first term are grouped by the levels of the second (and third) term. \code{terms} may also indicate higher order terms (e.g. interaction terms). Indicating levels in square brackets allows for selecting only specific groups. Term name and levels in brackets must be separated by a whitespace character, e.g. \code{terms = c("age", "education [1,3]")}. It is also possible to specify a range of numeric values for the predictions with a colon, for instance \code{terms = c("education [1,3]", "age [30:50]")}. Furthermore, it is possible to specify a function name. Values for predictions will then be transformed, e.g. \code{terms = "income [exp]"}. This is useful when model predictors were transformed for fitting the model and should be back-transformed to the original scale for predictions. Finally, numeric vectors for which no specific values are given, a "pretty range" is calculated, to avoid memory allocation problems for vectors with many unique values. If a numeric vector is specified as second or third term (i.e. if this vector represents a grouping structure), representative values (see \code{\link[ggeffects]{values_at}}) are chosen. If all values for a numeric vector should be used to compute predictions, you may use e.g. terms = "age [all]". For more details, see \code{\link[ggeffects]{ggpredict}}.} }} \item{sort.est}{Determines in which way estimates are sorted in the plot: \itemize{ \item If \code{NULL} (default), no sorting is done and estimates are sorted in the same order as they appear in the model formula. \item If \code{TRUE}, estimates are sorted in descending order, with highest estimate at the top. \item If \code{sort.est = "sort.all"}, estimates are re-sorted for each coefficient (only applies if \code{type = "re"} and \code{grid = FALSE}), i.e. the estimates of the random effects for each predictor are sorted and plotted to an own plot. \item If \code{type = "re"}, specify a predictor's / coefficient's name to sort estimates according to this random effect. }} \item{rm.terms}{Character vector with names that indicate which terms should be removed from the plot. Counterpart to \code{terms}. \code{rm.terms = "t_name"} would remove the term \emph{t_name}. Default is \code{NULL}, i.e. all terms are used. For factors, levels that should be removed from the plot need to be explicitely indicated in square brackets, and match the model's coefficient names, e.g. \code{rm.terms = "t_name [2,3]"} would remove the terms \code{"t_name2"} and \code{"t_name3"} (assuming that the variable \code{t_name} was categorical and has at least the factor levels \code{2} and \code{3}). Another example for the \emph{iris} dataset would be \code{rm.terms = "Species [versicolor,virginica]"}. Note that the \code{rm.terms}-argument does not apply to \emph{Marginal Effects} plots.} \item{group.terms}{Numeric vector with group indices, to group coefficients. Each group of coefficients gets its own color (see 'Examples').} \item{order.terms}{Numeric vector, indicating in which order the coefficients should be plotted. See examples in \href{https://strengejacke.github.io/sjPlot/articles/plot_model_estimates.html}{this package-vignette}.} \item{pred.type}{Character, only applies for \emph{Marginal Effects} plots with mixed effects models. Indicates whether predicted values should be conditioned on random effects (\code{pred.type = "re"}) or fixed effects only (\code{pred.type = "fe"}, the default). For details, see documentation of the \code{type}-argument in \code{\link[ggeffects]{ggpredict}}.} \item{mdrt.values}{Indicates which values of the moderator variable should be used when plotting interaction terms (i.e. \code{type = "int"}). \describe{ \item{\code{"minmax"}}{(default) minimum and maximum values (lower and upper bounds) of the moderator are used to plot the interaction between independent variable and moderator(s).} \item{\code{"meansd"}}{uses the mean value of the moderator as well as one standard deviation below and above mean value to plot the effect of the moderator on the independent variable (following the convention suggested by Cohen and Cohen and popularized by Aiken and West (1991), i.e. using the mean, the value one standard deviation above, and the value one standard deviation below the mean as values of the moderator, see \href{https://www.theanalysisfactor.com/3-tips-interpreting-moderation/}{Grace-Martin K: 3 Tips to Make Interpreting Moderation Effects Easier}).} \item{\code{"zeromax"}}{is similar to the \code{"minmax"} option, however, \code{0} is always used as minimum value for the moderator. This may be useful for predictors that don't have an empirical zero-value, but absence of moderation should be simulated by using 0 as minimum.} \item{\code{"quart"}}{calculates and uses the quartiles (lower, median and upper) of the moderator value.} \item{\code{"all"}}{uses all values of the moderator variable.} }} \item{ri.nr}{Numeric vector. If \code{type = "re"} and fitted model has more than one random intercept, \code{ri.nr} indicates which random effects of which random intercept (or: which list elements of \code{\link[lme4]{ranef}}) will be plotted. Default is \code{NULL}, so all random effects will be plotted.} \item{title}{Character vector, used as plot title. By default, \code{\link[sjlabelled]{response_labels}} is called to retrieve the label of the dependent variable, which will be used as title. Use \code{title = ""} to remove title.} \item{axis.title}{Character vector of length one or two (depending on the plot function and type), used as title(s) for the x and y axis. If not specified, a default labelling is chosen. \strong{Note:} Some plot types may not support this argument sufficiently. In such cases, use the returned ggplot-object and add axis titles manually with \code{\link[ggplot2]{labs}}. Use \code{axis.title = ""} to remove axis titles.} \item{axis.labels}{Character vector with labels for the model terms, used as axis labels. By default, \code{\link[sjlabelled]{term_labels}} is called to retrieve the labels of the coefficients, which will be used as axis labels. Use \code{axis.labels = ""} or \code{auto.label = FALSE} to use the variable names as labels instead. If \code{axis.labels} is a named vector, axis labels (by default, the names of the model's coefficients) will be matched with the names of \code{axis.label}. This ensures that labels always match the related axis value, no matter in which way axis labels are sorted.} \item{legend.title}{Character vector, used as legend title for plots that have a legend.} \item{wrap.title}{Numeric, determines how many chars of the plot title are displayed in one line and when a line break is inserted.} \item{wrap.labels}{Numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{axis.lim}{Numeric vector of length 2, defining the range of the plot axis. Depending on plot-type, may effect either x- or y-axis. For \emph{Marginal Effects} plots, \code{axis.lim} may also be a list of two vectors of length 2, defining axis limits for both the x and y axis.} \item{grid.breaks}{Numeric value or vector; if \code{grid.breaks} is a single value, sets the distance between breaks for the axis at every \code{grid.breaks}'th position, where a major grid line is plotted. If \code{grid.breaks} is a vector, values will be used to define the axis positions of the major grid lines.} \item{ci.lvl}{Numeric, the level of the confidence intervals (error bars). Use \code{ci.lvl = NA} to remove error bars. For \code{stanreg}-models, \code{ci.lvl} defines the (outer) probability for the \emph{credible interval} that is plotted (see \code{\link[bayestestR]{ci}}). By default, \code{stanreg}-models are printed with two intervals: the "inner" interval, which defaults to the 50\%-CI; and the "outer" interval, which defaults to the 89\%-CI. \code{ci.lvl} affects only the outer interval in such cases. See \code{prob.inner} and \code{prob.outer} under the \code{...}-argument for more details.} \item{se}{Logical, if \code{TRUE}, the standard errors are also printed. If robust standard errors are required, use arguments \code{vcov.fun}, \code{vcov.type} and \code{vcov.args} (see \code{\link[parameters]{standard_error_robust}} and \href{https://easystats.github.io/parameters/articles/model_parameters_robust.html}{this vignette} for details), or use argument \code{robust} as shortcut. \code{se} overrides \code{ci.lvl}: if not \code{NULL}, arguments \code{ci.lvl} and \code{transform} will be ignored. Currently, \code{se} only applies to \emph{Coefficients} plots.} \item{robust}{Logical, shortcut for arguments \code{vcov.fun} and \code{vcov.type}. If \code{TRUE}, uses \code{vcov.fun = "vcovHC"} and \code{vcov.type = "HC3"} as default, that is, \code{\link[sandwich]{vcovHC}} with default-type is called (see \code{\link[parameters]{standard_error_robust}} and \href{https://easystats.github.io/parameters/articles/model_parameters_robust.html}{this vignette} for further details).} \item{vcov.fun}{Character vector, indicating the name of the \code{vcov*()}-function from the \pkg{sandwich} or \pkg{clubSandwich} package, e.g. \code{vcov.fun = "vcovCL"}, if robust standard errors are required.} \item{vcov.type}{Character vector, specifying the estimation type for the robust covariance matrix estimation (see \code{\link[sandwich:vcovHC]{vcovHC()}} or \code{clubSandwich::vcovCR()} for details).} \item{vcov.args}{List of named vectors, used as additional arguments that are passed down to \code{vcov.fun}.} \item{colors}{May be a character vector of color values in hex-format, valid color value names (see \code{demo("colors")}) or a name of a pre-defined color palette. Following options are valid for the \code{colors} argument: \itemize{ \item If not specified, a default color brewer palette will be used, which is suitable for the plot style. \item If \code{"gs"}, a greyscale will be used. \item If \code{"bw"}, and plot-type is a line-plot, the plot is black/white and uses different line types to distinguish groups (see \href{https://strengejacke.github.io/sjPlot/articles/blackwhitefigures.html}{this package-vignette}). \item If \code{colors} is any valid color brewer palette name, the related palette will be used. Use \code{RColorBrewer::display.brewer.all()} to view all available palette names. \item There are some pre-defined color palettes in this package, see \code{\link{sjPlot-themes}} for details. \item Else specify own color values or names as vector (e.g. \code{colors = "#00ff00"} or \code{colors = c("firebrick", "blue")}). }} \item{show.intercept}{Logical, if \code{TRUE}, the intercept of the fitted model is also plotted. Default is \code{FALSE}. If \code{transform = "exp"}, please note that due to exponential transformation of estimates, the intercept in some cases is non-finite and the plot can not be created.} \item{show.values}{Logical, whether values should be plotted or not.} \item{show.p}{Logical, adds asterisks that indicate the significance level of estimates to the value labels.} \item{show.data}{Logical, for \emph{Marginal Effects} plots, also plots the raw data points.} \item{show.legend}{For \emph{Marginal Effects} plots, shows or hides the legend.} \item{show.zeroinf}{Logical, if \code{TRUE}, shows the zero-inflation part of hurdle- or zero-inflated models.} \item{value.offset}{Numeric, offset for text labels to adjust their position relative to the dots or lines.} \item{value.size}{Numeric, indicates the size of value labels. Can be used for all plot types where the argument \code{show.values} is applicable, e.g. \code{value.size = 4}.} \item{jitter}{Numeric, between 0 and 1. If \code{show.data = TRUE}, you can add a small amount of random variation to the location of each data point. \code{jitter} then indicates the width, i.e. how much of a bin's width will be occupied by the jittered values.} \item{digits}{Numeric, amount of digits after decimal point when rounding estimates or values.} \item{dot.size}{Numeric, size of the dots that indicate the point estimates.} \item{line.size}{Numeric, size of the lines that indicate the error bars.} \item{vline.color}{Color of the vertical "zero effect" line. Default color is inherited from the current theme.} \item{p.threshold}{Numeric vector of length 3, indicating the treshold for annotating p-values with asterisks. Only applies if \code{p.style = "asterisk"}.} \item{p.adjust}{Character vector, if not \code{NULL}, indicates the method to adjust p-values. See \code{\link[stats]{p.adjust}} for details.} \item{grid}{Logical, if \code{TRUE}, multiple plots are plotted as grid layout.} \item{case}{Desired target case. Labels will automatically converted into the specified character case. See \code{snakecase::to_any_case()} for more details on this argument. By default, if \code{case} is not specified, it will be set to \code{"parsed"}, unless \code{prefix.labels} is not \code{"none"}. If \code{prefix.labels} is either \code{"label"} (or \code{"l"}) or \code{"varname"} (or \code{"v"}) and \code{case} is not specified, it will be set to \code{NULL} - this is a more convenient default when prefixing labels.} \item{auto.label}{Logical, if \code{TRUE} (the default), and \href{https://strengejacke.github.io/sjlabelled/articles/intro_sjlabelled.html}{data is labelled}, \code{\link[sjlabelled]{term_labels}} is called to retrieve the labels of the coefficients, which will be used as predictor labels. If data is not labelled, \href{https://easystats.github.io/parameters/reference/format_parameters.html}{format_parameters()} is used to create pretty labels. If \code{auto.label = FALSE}, original variable names and value labels (factor levels) are used.} \item{prefix.labels}{Indicates whether the value labels of categorical variables should be prefixed, e.g. with the variable name or variable label. See argument \code{prefix} in \code{\link[sjlabelled]{term_labels}} for details.} \item{bpe}{For \strong{Stan}-models (fitted with the \pkg{rstanarm}- or \pkg{brms}-package), the Bayesian point estimate is, by default, the median of the posterior distribution. Use \code{bpe} to define other functions to calculate the Bayesian point estimate. \code{bpe} needs to be a character naming the specific function, which is passed to the \code{fun}-argument in \code{\link[sjmisc]{typical_value}}. So, \code{bpe = "mean"} would calculate the mean value of the posterior distribution.} \item{bpe.style}{For \strong{Stan}-models (fitted with the \pkg{rstanarm}- or \pkg{brms}-package), the Bayesian point estimate is indicated as a small, vertical line by default. Use \code{bpe.style = "dot"} to plot a dot instead of a line for the point estimate.} \item{bpe.color}{Character vector, indicating the color of the Bayesian point estimate. Setting \code{bpe.color = NULL} will inherit the color from the mapped aesthetic to match it with the geom's color.} \item{ci.style}{Character vector, defining whether inner and outer intervals for Bayesion models are shown in boxplot-style (\code{"whisker"}) or in bars with different alpha-levels (\code{"bar"}).} \item{...}{Other arguments, passed down to various functions. Here is a list of supported arguments and their description in detail. \describe{ \item{\code{prob.inner} and \code{prob.outer}}{For \strong{Stan}-models (fitted with the \pkg{rstanarm}- or \pkg{brms}-package) and coefficients plot-types, you can specify numeric values between 0 and 1 for \code{prob.inner} and \code{prob.outer}, which will then be used as inner and outer probabilities for the uncertainty intervals (HDI). By default, the inner probability is 0.5 and the outer probability is 0.89 (unless \code{ci.lvl} is specified - in this case, \code{ci.lvl} is used as outer probability). } \item{\code{size.inner}}{For \strong{Stan}-models and \emph{Coefficients} plot-types, you can specify the width of the bar for the inner probabilities. Default is \code{0.1}. Setting \code{size.inner = 0} removes the inner probability regions. } \item{\code{width}, \code{alpha}, and \code{scale}}{Passed down to \code{geom_errorbar()} or \code{geom_density_ridges()}, for forest or diagnostic plots. } \item{\code{width}, \code{alpha}, \code{dot.alpha}, \code{dodge} and \code{log.y}}{Passed down to \code{\link[ggeffects]{plot.ggeffects}} for \emph{Marginal Effects} plots. } \item{\code{show.loess}}{Logical, for diagnostic plot-types \code{"slope"} and \code{"resid"}, adds (or hides) a loess-smoothed line to the plot. } \item{\emph{Marginal Effects} plot-types}{When plotting marginal effects, arguments are also passed down to \code{\link[ggeffects]{ggpredict}}, \code{\link[ggeffects]{ggeffect}} or \code{\link[ggeffects]{plot.ggeffects}}. } \item{Case conversion of labels}{For case conversion of labels (see argument \code{case}), arguments \code{sep_in} and \code{sep_out} will be passed down to \code{snakecase::to_any_case()}. This only applies to automatically retrieved term labels, \emph{not} if term labels are provided by the \code{axis.labels}-argument. } }} } \value{ Depending on the plot-type, \code{plot_model()} returns a \code{ggplot}-object or a list of such objects. \code{get_model_data} returns the associated data with the plot-object as tidy data frame, or (depending on the plot-type) a list of such data frames. } \description{ \code{plot_model()} creates plots from regression models, either estimates (as so-called forest or dot whisker plots) or marginal effects. } \details{ \subsection{Different Plot Types}{ \describe{ \item{\code{type = "std"}}{Plots standardized estimates. See details below.} \item{\code{type = "std2"}}{Plots standardized estimates, however, standardization follows Gelman's (2008) suggestion, rescaling the estimates by dividing them by two standard deviations instead of just one. Resulting coefficients are then directly comparable for untransformed binary predictors. } \item{\code{type = "pred"}}{Plots estimated marginal means (or marginal effects). Simply wraps \code{\link[ggeffects]{ggpredict}}. See also \href{https://strengejacke.github.io/sjPlot/articles/plot_marginal_effects.html}{this package-vignette}. } \item{\code{type = "eff"}}{Plots estimated marginal means (or marginal effects). Simply wraps \code{\link[ggeffects]{ggeffect}}. See also \href{https://strengejacke.github.io/sjPlot/articles/plot_marginal_effects.html}{this package-vignette}. } \item{\code{type = "int"}}{A shortcut for marginal effects plots, where interaction terms are automatically detected and used as \code{terms}-argument. Furthermore, if the moderator variable (the second - and third - term in an interaction) is continuous, \code{type = "int"} automatically chooses useful values based on the \code{mdrt.values}-argument, which are passed to \code{terms}. Then, \code{\link[ggeffects]{ggpredict}} is called. \code{type = "int"} plots the interaction term that appears first in the formula along the x-axis, while the second (and possibly third) variable in an interaction is used as grouping factor(s) (moderating variable). Use \code{type = "pred"} or \code{type = "eff"} and specify a certain order in the \code{terms}-argument to indicate which variable(s) should be used as moderator. See also \href{https://strengejacke.github.io/sjPlot/articles/plot_interactions.html}{this package-vignette}. } \item{\code{type = "slope"} and \code{type = "resid"}}{Simple diagnostic-plots, where a linear model for each single predictor is plotted against the response variable, or the model's residuals. Additionally, a loess-smoothed line is added to the plot. The main purpose of these plots is to check whether the relationship between outcome (or residuals) and a predictor is roughly linear or not. Since the plots are based on a simple linear regression with only one model predictor at the moment, the slopes (i.e. coefficients) may differ from the coefficients of the complete model. } \item{\code{type = "diag"}}{For \strong{Stan-models}, plots the prior versus posterior samples. For \strong{linear (mixed) models}, plots for multicollinearity-check (Variance Inflation Factors), QQ-plots, checks for normal distribution of residuals and homoscedasticity (constant variance of residuals) are shown. For \strong{generalized linear mixed models}, returns the QQ-plot for random effects. } } } \subsection{Standardized Estimates}{ Default standardization is done by completely refitting the model on the standardized data. Hence, this approach is equal to standardizing the variables before fitting the model, which is particularly recommended for complex models that include interactions or transformations (e.g., polynomial or spline terms). When \code{type = "std2"}, standardization of estimates follows \href{http://www.stat.columbia.edu/~gelman/research/published/standardizing7.pdf}{Gelman's (2008)} suggestion, rescaling the estimates by dividing them by two standard deviations instead of just one. Resulting coefficients are then directly comparable for untransformed binary predictors. } } \examples{ # prepare data library(sjmisc) data(efc) efc <- to_factor(efc, c161sex, e42dep, c172code) m <- lm(neg_c_7 ~ pos_v_4 + c12hour + e42dep + c172code, data = efc) # simple forest plot plot_model(m) # grouped coefficients plot_model(m, group.terms = c(1, 2, 3, 3, 3, 4, 4)) # keep only selected terms in the model: pos_v_4, the # levels 3 and 4 of factor e42dep and levels 2 and 3 for c172code plot_model(m, terms = c("pos_v_4", "e42dep [3,4]", "c172code [2,3]")) # multiple plots, as returned from "diagnostic"-plot type, # can be arranged with 'plot_grid()' \dontrun{ p <- plot_model(m, type = "diag") plot_grid(p)} # plot random effects if (require("lme4") && require("glmmTMB")) { m <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy) plot_model(m, type = "re") # plot marginal effects plot_model(m, type = "pred", terms = "Days") } # plot interactions \dontrun{ m <- glm( tot_sc_e ~ c161sex + c172code * neg_c_7, data = efc, family = poisson() ) # type = "int" automatically selects groups for continuous moderator # variables - see argument 'mdrt.values'. The following function call is # identical to: # plot_model(m, type = "pred", terms = c("c172code", "neg_c_7 [7,28]")) plot_model(m, type = "int") # switch moderator plot_model(m, type = "pred", terms = c("neg_c_7", "c172code")) # same as # ggeffects::ggpredict(m, terms = c("neg_c_7", "c172code"))} # plot Stan-model \dontrun{ if (require("rstanarm")) { data(mtcars) m <- stan_glm(mpg ~ wt + am + cyl + gear, data = mtcars, chains = 1) plot_model(m, bpe.style = "dot") }} } \references{ Gelman A (2008) "Scaling regression inputs by dividing by two standard deviations." \emph{Statistics in Medicine 27: 2865-2873.} \url{http://www.stat.columbia.edu/~gelman/research/published/standardizing7.pdf} \cr \cr Aiken and West (1991). Multiple Regression: Testing and Interpreting Interactions. } sjPlot/man/sjp.aov1.Rd0000644000176200001440000001125013733137536014242 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sjPlotAnova.R \name{sjp.aov1} \alias{sjp.aov1} \title{Plot One-Way-Anova tables} \usage{ sjp.aov1( var.dep, var.grp, meansums = FALSE, title = NULL, axis.labels = NULL, rev.order = FALSE, string.interc = "(Intercept)", axis.title = "", axis.lim = NULL, geom.colors = c("#3366a0", "#aa3333"), geom.size = 3, wrap.title = 50, wrap.labels = 25, grid.breaks = NULL, show.values = TRUE, digits = 2, y.offset = 0.15, show.p = TRUE, show.summary = FALSE ) } \arguments{ \item{var.dep}{Dependent variable. Will be used with following formula: \code{aov(var.dep ~ var.grp)}} \item{var.grp}{Factor with the cross-classifying variable, where \code{var.dep} is grouped into the categories represented by \code{var.grp}.} \item{meansums}{Logical, if \code{TRUE}, the values reported are the true group mean values. If \code{FALSE} (default), the values are reported in the standard way, i.e. the values indicate the difference of the group mean in relation to the intercept (reference group).} \item{title}{character vector, used as plot title. Depending on plot type and function, will be set automatically. If \code{title = ""}, no title is printed. For effect-plots, may also be a character vector of length > 1, to define titles for each sub-plot or facet.} \item{axis.labels}{character vector with labels used as axis labels. Optional argument, since in most cases, axis labels are set automatically.} \item{rev.order}{Logical, if \code{TRUE}, order of categories (groups) is reversed.} \item{string.interc}{Character vector that indicates the reference group (intercept), that is appended to the value label of the grouping variable. Default is \code{"(Intercept)"}.} \item{axis.title}{Character vector of length one or two (depending on the plot function and type), used as title(s) for the x and y axis. If not specified, a default labelling is chosen. \strong{Note:} Some plot types may not support this argument sufficiently. In such cases, use the returned ggplot-object and add axis titles manually with \code{\link[ggplot2]{labs}}. Use \code{axis.title = ""} to remove axis titles.} \item{axis.lim}{Numeric vector of length 2, defining the range of the plot axis. Depending on plot type, may effect either x- or y-axis, or both. For multiple plot outputs (e.g., from \code{type = "eff"} or \code{type = "slope"} in \code{\link{plot_model}}), \code{axis.lim} may also be a list of vectors of length 2, defining axis limits for each plot (only if non-faceted).} \item{geom.colors}{user defined color for geoms. See 'Details' in \code{\link{plot_grpfrq}}.} \item{geom.size}{size resp. width of the geoms (bar width, line thickness or point size, depending on plot type and function). Note that bar and bin widths mostly need smaller values than dot sizes.} \item{wrap.title}{numeric, determines how many chars of the plot title are displayed in one line and when a line break is inserted.} \item{wrap.labels}{numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{grid.breaks}{numeric; sets the distance between breaks for the axis, i.e. at every \code{grid.breaks}'th position a major grid is being printed.} \item{show.values}{Logical, whether values should be plotted or not.} \item{digits}{Numeric, amount of digits after decimal point when rounding estimates or values.} \item{y.offset}{numeric, offset for text labels when their alignment is adjusted to the top/bottom of the geom (see \code{hjust} and \code{vjust}).} \item{show.p}{Logical, adds significance levels to values, or value and variable labels.} \item{show.summary}{logical, if \code{TRUE} (default), a summary with chi-squared statistics (see \code{\link{chisq.test}}), Cramer's V or Phi-value etc. is shown. If a cell contains expected values lower than five (or lower than 10 if df is 1), the Fisher's exact test (see \code{\link{fisher.test}}) is computed instead of chi-squared test. If the table's matrix is larger than 2x2, Fisher's exact test with Monte Carlo simulation is computed.} } \value{ A ggplot-object. } \description{ Plot One-Way-Anova table sum of squares (SS) of each factor level (group) against the dependent variable. The SS of the factor variable against the dependent variable (variance within and between groups) is printed to the model summary. } \examples{ data(efc) # note: "var.grp" does not need to be a factor. # coercion to factor is done by the function sjp.aov1(efc$c12hour, efc$e42dep) } sjPlot/man/tab_xtab.Rd0000644000176200001440000002023414051650703014355 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tab_xtab.R \name{tab_xtab} \alias{tab_xtab} \alias{sjt.xtab} \title{Summary of contingency tables as HTML table} \usage{ tab_xtab( var.row, var.col, weight.by = NULL, title = NULL, var.labels = NULL, value.labels = NULL, wrap.labels = 20, show.obs = TRUE, show.cell.prc = FALSE, show.row.prc = FALSE, show.col.prc = FALSE, show.exp = FALSE, show.legend = FALSE, show.na = FALSE, show.summary = TRUE, drop.empty = TRUE, statistics = c("auto", "cramer", "phi", "spearman", "kendall", "pearson", "fisher"), string.total = "Total", digits = 1, tdcol.n = "black", tdcol.expected = "#339999", tdcol.cell = "#993333", tdcol.row = "#333399", tdcol.col = "#339933", emph.total = FALSE, emph.color = "#f8f8f8", prc.sign = " %", hundret = "100.0", CSS = NULL, encoding = NULL, file = NULL, use.viewer = TRUE, remove.spaces = TRUE, ... ) sjt.xtab( var.row, var.col, weight.by = NULL, title = NULL, var.labels = NULL, value.labels = NULL, wrap.labels = 20, show.obs = TRUE, show.cell.prc = FALSE, show.row.prc = FALSE, show.col.prc = FALSE, show.exp = FALSE, show.legend = FALSE, show.na = FALSE, show.summary = TRUE, drop.empty = TRUE, statistics = c("auto", "cramer", "phi", "spearman", "kendall", "pearson", "fisher"), string.total = "Total", digits = 1, tdcol.n = "black", tdcol.expected = "#339999", tdcol.cell = "#993333", tdcol.row = "#333399", tdcol.col = "#339933", emph.total = FALSE, emph.color = "#f8f8f8", prc.sign = " %", hundret = "100.0", CSS = NULL, encoding = NULL, file = NULL, use.viewer = TRUE, remove.spaces = TRUE, ... ) } \arguments{ \item{var.row}{Variable that should be displayed in the table rows.} \item{var.col}{Cariable that should be displayed in the table columns.} \item{weight.by}{Vector of weights that will be applied to weight all cases. Must be a vector of same length as the input vector. Default is \code{NULL}, so no weights are used.} \item{title}{String, will be used as table caption.} \item{var.labels}{Character vector with variable names, which will be used to label variables in the output.} \item{value.labels}{Character vector (or \code{list} of character vectors) with value labels of the supplied variables, which will be used to label variable values in the output.} \item{wrap.labels}{Numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{show.obs}{Logical, if \code{TRUE}, observed values are shown} \item{show.cell.prc}{Logical, if \code{TRUE}, cell percentage values are shown} \item{show.row.prc}{Logical, if \code{TRUE}, row percentage values are shown} \item{show.col.prc}{Logical, if \code{TRUE}, column percentage values are shown} \item{show.exp}{Logical, if \code{TRUE}, expected values are also shown} \item{show.legend}{logical, if \code{TRUE}, and depending on plot type and function, a legend is added to the plot.} \item{show.na}{logical, if \code{TRUE}, \code{\link{NA}}'s (missing values) are added to the output.} \item{show.summary}{Logical, if \code{TRUE}, a summary row with chi-squared statistics, degrees of freedom and Cramer's V or Phi coefficient and p-value for the chi-squared statistics.} \item{drop.empty}{Logical, if \code{TRUE} and the variable's values are labeled, values that have no observations are still printed in the table (with frequency \code{0}). If \code{FALSE}, values / factor levels with no occurrence in the data are omitted from the output.} \item{statistics}{Name of measure of association that should be computed. May be one of \code{"auto"}, \code{"cramer"}, \code{"phi"}, \code{"spearman"}, \code{"kendall"}, \code{"pearson"} or \code{"fisher"}. See \code{\link[sjstats]{xtab_statistics}}.} \item{string.total}{Character label for the total column / row header} \item{digits}{Amount of decimals for estimates} \item{tdcol.n}{Color for highlighting count (observed) values in table cells. Default is black.} \item{tdcol.expected}{Color for highlighting expected values in table cells. Default is cyan.} \item{tdcol.cell}{Color for highlighting cell percentage values in table cells. Default is red.} \item{tdcol.row}{Color for highlighting row percentage values in table cells. Default is blue.} \item{tdcol.col}{Color for highlighting column percentage values in table cells. Default is green.} \item{emph.total}{Logical, if \code{TRUE}, the total column and row will be emphasized with a different background color. See \code{emph.color}.} \item{emph.color}{Logical, if \code{emph.total = TRUE}, this color value will be used for painting the background of the total column and row. Default is a light grey.} \item{prc.sign}{The percentage sign that is printed in the table cells, in HTML-format. Default is \code{" \%"}, hence the percentage sign has a non-breaking-space after the percentage value.} \item{hundret}{Default value that indicates the 100-percent column-sums (since rounding values may lead to non-exact results). Default is \code{"100.0"}.} \item{CSS}{A \code{\link{list}} with user-defined style-sheet-definitions, according to the \href{http://www.w3.org/Style/CSS/}{official CSS syntax}. See 'Details' or \href{https://strengejacke.github.io/sjPlot/articles/table_css.html}{this package-vignette}.} \item{encoding}{String, indicating the charset encoding used for variable and value labels. Default is \code{NULL}, so encoding will be auto-detected depending on your platform (e.g., \code{"UTF-8"} for Unix and \code{"Windows-1252"} for Windows OS). Change encoding if specific chars are not properly displayed (e.g. German umlauts).} \item{file}{Destination file, if the output should be saved as file. If \code{NULL} (default), the output will be saved as temporary file and opened either in the IDE's viewer pane or the default web browser.} \item{use.viewer}{Logical, if \code{TRUE}, the HTML table is shown in the IDE's viewer pane. If \code{FALSE} or no viewer available, the HTML table is opened in a web browser.} \item{remove.spaces}{Logical, if \code{TRUE}, leading spaces are removed from all lines in the final string that contains the html-data. Use this, if you want to remove parantheses for html-tags. The html-source may look less pretty, but it may help when exporting html-tables to office tools.} \item{...}{Other arguments, currently passed down to the test statistics functions \code{chisq.test()} or \code{fisher.test()}.} } \value{ Invisibly returns \itemize{ \item the web page style sheet (\code{page.style}), \item the web page content (\code{page.content}), \item the complete html-output (\code{page.complete}) and \item the html-table with inline-css for use with knitr (\code{knitr}) } for further use. } \description{ Shows contingency tables as HTML file in browser or viewer pane, or saves them as file. } \examples{ # prepare sample data set data(efc) # print simple cross table with labels \dontrun{ if (interactive()) { tab_xtab(efc$e16sex, efc$e42dep) # print cross table with manually set # labels and expected values tab_xtab( efc$e16sex, efc$e42dep, var.labels = c("Elder's gender", "Elder's dependency"), show.exp = TRUE ) # print minimal cross table with labels, total col/row highlighted tab_xtab(efc$e16sex, efc$e42dep, show.cell.prc = FALSE, emph.total = TRUE) # User defined style sheet tab_xtab(efc$e16sex, efc$e42dep, CSS = list(css.table = "border: 2px solid;", css.tdata = "border: 1px solid;", css.horline = "border-bottom: double blue;")) # ordinal data, use Kendall's tau tab_xtab(efc$e42dep, efc$quol_5, statistics = "kendall") # calculate Spearman's rho, with continuity correction tab_xtab( efc$e42dep, efc$quol_5, statistics = "spearman", exact = FALSE, continuity = TRUE ) } } } sjPlot/man/plot_residuals.Rd0000644000176200001440000000514213567425621015635 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_residuals.R \name{plot_residuals} \alias{plot_residuals} \title{Plot predicted values and their residuals} \usage{ plot_residuals( fit, geom.size = 2, remove.estimates = NULL, show.lines = TRUE, show.resid = TRUE, show.pred = TRUE, show.ci = FALSE ) } \arguments{ \item{fit}{Fitted linear (mixed) regression model (including objects of class \code{\link[nlme]{gls}} or \code{plm}).} \item{geom.size}{size resp. width of the geoms (bar width, line thickness or point size, depending on plot type and function). Note that bar and bin widths mostly need smaller values than dot sizes.} \item{remove.estimates}{Numeric vector with indices (order equals to row index of \code{coef(fit)}) or character vector with coefficient names that indicate which estimates should be removed from the table output. The first estimate is the intercept, followed by the model predictors. \emph{The intercept cannot be removed from the table output!} \code{remove.estimates = c(2:4)} would remove the 2nd to the 4th estimate (1st to 3rd predictor after intercept) from the output. \code{remove.estimates = "est_name"} would remove the estimate \emph{est_name}. Default is \code{NULL}, i.e. all estimates are printed.} \item{show.lines}{Logical, if \code{TRUE}, a line connecting predicted and residual values is plotted. Set this argument to \code{FALSE}, if plot-building is too time consuming.} \item{show.resid}{Logical, if \code{TRUE}, residual values are plotted.} \item{show.pred}{Logical, if \code{TRUE}, predicted values are plotted.} \item{show.ci}{Logical, if \code{TRUE)}, adds notches to the box plot, which are used to compare groups; if the notches of two boxes do not overlap, medians are considered to be significantly different.} } \value{ A ggplot-object. } \description{ This function plots observed and predicted values of the response of linear (mixed) models for each coefficient and highlights the observed values according to their distance (residuals) to the predicted values. This allows to investigate how well actual and predicted values of the outcome fit across the predictor variables. } \note{ The actual (observed) values have a coloured fill, while the predicted values have a solid outline without filling. } \examples{ data(efc) # fit model fit <- lm(neg_c_7 ~ c12hour + e17age + e42dep, data = efc) # plot residuals for all independent variables plot_residuals(fit) # remove some independent variables from output plot_residuals(fit, remove.estimates = c("e17age", "e42dep")) } sjPlot/man/view_df.Rd0000644000176200001440000001256714051650703014226 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/view_df.R \name{view_df} \alias{view_df} \title{View structure of labelled data frames} \usage{ view_df( x, weight.by = NULL, alternate.rows = TRUE, show.id = TRUE, show.type = FALSE, show.values = TRUE, show.string.values = FALSE, show.labels = TRUE, show.frq = FALSE, show.prc = FALSE, show.wtd.frq = FALSE, show.wtd.prc = FALSE, show.na = FALSE, max.len = 15, sort.by.name = FALSE, wrap.labels = 50, verbose = FALSE, CSS = NULL, encoding = NULL, file = NULL, use.viewer = TRUE, remove.spaces = TRUE ) } \arguments{ \item{x}{A (labelled) data frame, imported by \code{\link[sjlabelled]{read_spss}}, \code{\link[sjlabelled]{read_sas}} or \code{\link[sjlabelled]{read_stata}} function, or any similar labelled data frame (see \code{\link[sjlabelled]{set_label}} and \code{\link[sjlabelled]{set_labels}}).} \item{weight.by}{Name of variable in \code{x} that indicated the vector of weights that will be applied to weight all observations. Default is \code{NULL}, so no weights are used.} \item{alternate.rows}{Logical, if \code{TRUE}, rows are printed in alternatig colors (white and light grey by default).} \item{show.id}{Logical, if \code{TRUE} (default), the variable ID is shown in the first column.} \item{show.type}{Logical, if \code{TRUE}, adds information about the variable type to the variable column.} \item{show.values}{Logical, if \code{TRUE} (default), the variable values are shown as additional column.} \item{show.string.values}{Logical, if \code{TRUE}, elements of character vectors are also shown. By default, these are omitted due to possibly overlengthy tables.} \item{show.labels}{Logical, if \code{TRUE} (default), the value labels are shown as additional column.} \item{show.frq}{Logical, if \code{TRUE}, an additional column with frequencies for each variable is shown.} \item{show.prc}{Logical, if \code{TRUE}, an additional column with percentage of frequencies for each variable is shown.} \item{show.wtd.frq}{Logical, if \code{TRUE}, an additional column with weighted frequencies for each variable is shown. Weights strem from \code{weight.by}.} \item{show.wtd.prc}{Logical, if \code{TRUE}, an additional column with weighted percentage of frequencies for each variable is shown. Weights strem from \code{weight.by}.} \item{show.na}{logical, if \code{TRUE}, \code{\link{NA}}'s (missing values) are added to the output.} \item{max.len}{Numeric, indicates how many values and value labels per variable are shown. Useful for variables with many different values, where the output can be truncated.} \item{sort.by.name}{Logical, if \code{TRUE}, rows are sorted according to the variable names. By default, rows (variables) are ordered according to their order in the data frame.} \item{wrap.labels}{Numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{verbose}{Logical, if \code{TRUE}, a progress bar is displayed while creating the output.} \item{CSS}{A \code{\link{list}} with user-defined style-sheet-definitions, according to the \href{http://www.w3.org/Style/CSS/}{official CSS syntax}. See 'Details' or \href{https://strengejacke.github.io/sjPlot/articles/table_css.html}{this package-vignette}.} \item{encoding}{Character vector, indicating the charset encoding used for variable and value labels. Default is \code{"UTF-8"}. For Windows Systems, \code{encoding = "Windows-1252"} might be necessary for proper display of special characters.} \item{file}{Destination file, if the output should be saved as file. If \code{NULL} (default), the output will be saved as temporary file and opened either in the IDE's viewer pane or the default web browser.} \item{use.viewer}{Logical, if \code{TRUE}, the HTML table is shown in the IDE's viewer pane. If \code{FALSE} or no viewer available, the HTML table is opened in a web browser.} \item{remove.spaces}{Logical, if \code{TRUE}, leading spaces are removed from all lines in the final string that contains the html-data. Use this, if you want to remove parantheses for html-tags. The html-source may look less pretty, but it may help when exporting html-tables to office tools.} } \value{ Invisibly returns \itemize{ \item the web page style sheet (\code{page.style}), \item the web page content (\code{page.content}), \item the complete html-output (\code{page.complete}) and \item the html-table with inline-css for use with knitr (\code{knitr}) } for further use. } \description{ Save (or show) content of an imported SPSS, SAS or Stata data file, or any similar labelled \code{data.frame}, as HTML table. This quick overview shows variable ID number, name, label, type and associated value labels. The result can be considered as "codeplan" of the data frame. } \examples{ \dontrun{ # init dataset data(efc) # view variables view_df(efc) # view variables w/o values and value labels view_df(efc, show.values = FALSE, show.labels = FALSE) # view variables including variable typed, orderd by name view_df(efc, sort.by.name = TRUE, show.type = TRUE) # User defined style sheet view_df(efc, CSS = list(css.table = "border: 2px solid;", css.tdata = "border: 1px solid;", css.arc = "color:blue;"))} } sjPlot/man/tab_model.Rd0000644000176200001440000005366314136206671014540 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/tab_model.R \name{tab_model} \alias{tab_model} \title{Print regression models as HTML table} \usage{ tab_model( ..., transform, show.intercept = TRUE, show.est = TRUE, show.ci = 0.95, show.ci50 = FALSE, show.se = NULL, show.std = NULL, show.p = TRUE, show.stat = FALSE, show.df = FALSE, show.zeroinf = TRUE, show.r2 = TRUE, show.icc = TRUE, show.re.var = TRUE, show.ngroups = TRUE, show.fstat = FALSE, show.aic = FALSE, show.aicc = FALSE, show.dev = FALSE, show.loglik = FALSE, show.obs = TRUE, show.reflvl = FALSE, terms = NULL, rm.terms = NULL, order.terms = NULL, keep = NULL, drop = NULL, title = NULL, pred.labels = NULL, dv.labels = NULL, wrap.labels = 25, bootstrap = FALSE, iterations = 1000, seed = NULL, robust = FALSE, vcov.fun = NULL, vcov.type = c("HC3", "const", "HC", "HC0", "HC1", "HC2", "HC4", "HC4m", "HC5", "CR0", "CR1", "CR1p", "CR1S", "CR2", "CR3"), vcov.args = NULL, string.pred = "Predictors", string.est = "Estimate", string.std = "std. Beta", string.ci = "CI", string.se = "std. Error", string.std_se = "standardized std. Error", string.std_ci = "standardized CI", string.p = "p", string.std.p = "std. p", string.df = "df", string.stat = "Statistic", string.std.stat = "std. Statistic", string.resp = "Response", string.intercept = "(Intercept)", strings = NULL, ci.hyphen = " – ", minus.sign = "-", collapse.ci = FALSE, collapse.se = FALSE, linebreak = TRUE, col.order = c("est", "se", "std.est", "std.se", "ci", "std.ci", "ci.inner", "ci.outer", "stat", "std.stat", "p", "std.p", "df.error", "response.level"), digits = 2, digits.p = 3, digits.rsq = 3, digits.re = 2, emph.p = TRUE, p.val = NULL, df.method = NULL, p.style = c("numeric", "stars", "numeric_stars", "scientific", "scientific_stars"), p.threshold = c(0.05, 0.01, 0.001), p.adjust = NULL, case = "parsed", auto.label = TRUE, prefix.labels = c("none", "varname", "label"), bpe = "median", CSS = css_theme("regression"), file = NULL, use.viewer = TRUE, encoding = "UTF-8" ) } \arguments{ \item{...}{One or more regression models, including glm's or mixed models. May also be a \code{list} with fitted models. See 'Examples'.} \item{transform}{A character vector, naming a function that will be applied on estimates and confidence intervals. By default, \code{transform} will automatically use \code{"exp"} as transformation for applicable classes of \code{model} (e.g. logistic or poisson regression). Estimates of linear models remain untransformed. Use \code{NULL} if you want the raw, non-transformed estimates.} \item{show.intercept}{Logical, if \code{TRUE}, the intercepts are printed.} \item{show.est}{Logical, if \code{TRUE}, the estimates are printed.} \item{show.ci}{Either logical, and if \code{TRUE}, the confidence intervals is printed to the table; if \code{FALSE}, confidence intervals are omitted. Or numeric, between 0 and 1, indicating the range of the confidence intervals.} \item{show.ci50}{Logical, if \code{TRUE}, for Bayesian models, a second credible interval is added to the table output.} \item{show.se}{Logical, if \code{TRUE}, the standard errors are also printed. If robust standard errors are required, use arguments \code{vcov.fun}, \code{vcov.type} and \code{vcov.args} (see \code{\link[parameters]{standard_error_robust}} and \href{https://easystats.github.io/parameters/articles/model_parameters_robust.html}{this vignette} for details).} \item{show.std}{Indicates whether standardized beta-coefficients should also printed, and if yes, which type of standardization is done. See 'Details'.} \item{show.p}{Logical, if \code{TRUE}, p-values are also printed.} \item{show.stat}{Logical, if \code{TRUE}, the coefficients' test statistic is also printed.} \item{show.df}{Logical, if \code{TRUE} and \code{p.val = "kr"}, the p-values for linear mixed models are based on df with Kenward-Rogers approximation. These df-values are printed. See \code{\link[parameters]{p_value}} for details.} \item{show.zeroinf}{Logical, if \code{TRUE} and model has a zero-inflated model part, this is also printed to the table.} \item{show.r2}{Logical, if \code{TRUE}, the r-squared value is also printed. Depending on the model, these might be pseudo-r-squared values, or Bayesian r-squared etc. See \code{\link[performance]{r2}} for details.} \item{show.icc}{Logical, if \code{TRUE}, prints the intraclass correlation coefficient for mixed models. See \code{\link[performance]{icc}} for details.} \item{show.re.var}{Logical, if \code{TRUE}, prints the random effect variances for mixed models. See \code{\link[insight]{get_variance}} for details.} \item{show.ngroups}{Logical, if \code{TRUE}, shows number of random effects groups for mixed models.} \item{show.fstat}{Logical, if \code{TRUE}, the F-statistics for each model is printed in the table summary. This option is not supported by all model types.} \item{show.aic}{Logical, if \code{TRUE}, the AIC value for each model is printed in the table summary.} \item{show.aicc}{Logical, if \code{TRUE}, the second-order AIC value for each model is printed in the table summary.} \item{show.dev}{Logical, if \code{TRUE}, shows the deviance of the model.} \item{show.loglik}{Logical, if \code{TRUE}, shows the log-Likelihood of the model.} \item{show.obs}{Logical, if \code{TRUE}, the number of observations per model is printed in the table summary.} \item{show.reflvl}{Logical, if \code{TRUE}, an additional row is inserted to the table before each predictor of type \code{\link{factor}}, which will indicate the reference level of the related factor.} \item{terms}{Character vector with names of those terms (variables) that should be printed in the table. All other terms are removed from the output. If \code{NULL}, all terms are printed. Note that the term names must match the names of the model's coefficients. For factors, this means that the variable name is suffixed with the related factor level, and each category counts as one term. E.g. \code{rm.terms = "t_name [2,3]"} would remove the terms \code{"t_name2"} and \code{"t_name3"} (assuming that the variable \code{t_name} is categorical and has at least the factor levels \code{2} and \code{3}). Another example for the \emph{iris}-dataset: \code{terms = "Species"} would not work, instead use \code{terms = "Species [versicolor,virginica]"}.} \item{rm.terms}{Character vector with names that indicate which terms should be removed from the output Counterpart to \code{terms}. \code{rm.terms = "t_name"} would remove the term \emph{t_name}. Default is \code{NULL}, i.e. all terms are used. For factors, levels that should be removed from the plot need to be explicitly indicated in square brackets, and match the model's coefficient names, e.g. \code{rm.terms = "t_name [2,3]"} would remove the terms \code{"t_name2"} and \code{"t_name3"} (assuming that the variable \code{t_name} was categorical and has at least the factor levels \code{2} and \code{3}).} \item{order.terms}{Numeric vector, indicating in which order the coefficients should be plotted. See examples in \href{https://strengejacke.github.io/sjPlot/articles/plot_model_estimates.html}{this package-vignette}.} \item{keep, drop}{Character containing a regular expression pattern that describes the parameters that should be included (for \code{keep}) or excluded (for \code{drop}) in the returned data frame. \code{keep} may also be a named list of regular expressions. All non-matching parameters will be removed from the output. If \code{keep} has more than one element, these will be merged with an \code{OR} operator into a regular expression pattern like this: \code{"(one|two|three)"}. See further details in \code{?parameters::model_parameters}.} \item{title}{String, will be used as table caption.} \item{pred.labels}{Character vector with labels of predictor variables. If not \code{NULL}, \code{pred.labels} will be used in the first table column with the predictors' names. By default, if \code{auto.label = TRUE} and \href{https://strengejacke.github.io/sjlabelled/articles/intro_sjlabelled.html}{data is labelled}, \code{\link[sjlabelled]{term_labels}} is called to retrieve the labels of the coefficients, which will be used as predictor labels. If data is not labelled, \href{https://easystats.github.io/parameters/reference/format_parameters.html}{format_parameters()} is used to create pretty labels. If \code{pred.labels = ""} or \code{auto.label = FALSE}, the raw variable names as used in the model formula are used as predictor labels. If \code{pred.labels} is a named vector, predictor labels (by default, the names of the model's coefficients) will be matched with the names of \code{pred.labels}. This ensures that labels always match the related predictor in the table, no matter in which way the predictors are sorted. See 'Examples'.} \item{dv.labels}{Character vector with labels of dependent variables of all fitted models. If \code{dv.labels = ""}, the row with names of dependent variables is omitted from the table.} \item{wrap.labels}{Numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{bootstrap}{Logical, if \code{TRUE}, returns bootstrapped estimates..} \item{iterations}{Numeric, number of bootstrap iterations (default is 1000).} \item{seed}{Numeric, the number of the seed to replicate bootstrapped estimates. If \code{NULL}, uses random seed.} \item{robust}{Logical, shortcut for arguments \code{vcov.fun} and \code{vcov.type}. If \code{TRUE}, uses \code{vcov.fun = "vcovHC"} and \code{vcov.type = "HC3"} as default, that is, \code{\link[sandwich]{vcovHC}} with default-type is called (see \code{\link[parameters]{standard_error_robust}} and \href{https://easystats.github.io/parameters/articles/model_parameters_robust.html}{this vignette} for further details).} \item{vcov.fun}{Character vector, indicating the name of the \code{vcov*()}-function from the \pkg{sandwich} or \pkg{clubSandwich} package, e.g. \code{vcov.fun = "vcovCL"}, if robust standard errors are required.} \item{vcov.type}{Character vector, specifying the estimation type for the robust covariance matrix estimation (see \code{\link[sandwich:vcovHC]{vcovHC()}} or \code{clubSandwich::vcovCR()} for details).} \item{vcov.args}{List of named vectors, used as additional arguments that are passed down to \code{vcov.fun}.} \item{string.pred}{Character vector,used as headline for the predictor column. Default is \code{"Predictors"}.} \item{string.est}{Character vector, used for the column heading of coefficients. Default is based on the response scale, e.g. for logistic regression models, \code{"Odds Ratios"} will be chosen, while for Poisson models it is \code{"Incidence Rate Ratios"} etc. Default if not specified is \code{"Estimate"}.} \item{string.std}{Character vector, used for the column heading of standardized beta coefficients. Default is \code{"std. Beta"}.} \item{string.ci}{Character vector, used for the column heading of confidence interval values. Default is \code{"CI"}.} \item{string.se}{Character vector, used for the column heading of standard error values. Default is \code{"std. Error"}.} \item{string.std_se}{Character vector, used for the column heading of standard error of standardized coefficients. Default is \code{"standardized std. Error"}.} \item{string.std_ci}{Character vector, used for the column heading of confidence intervals of standardized coefficients. Default is \code{"standardized std. Error"}.} \item{string.p}{Character vector, used for the column heading of p values. Default is \code{"p"}.} \item{string.std.p}{Character vector, used for the column heading of p values. Default is \code{"std. p"}.} \item{string.df}{Character vector, used for the column heading of degrees of freedom. Default is \code{"df"}.} \item{string.stat}{Character vector, used for the test statistic. Default is \code{"Statistic"}.} \item{string.std.stat}{Character vector, used for the test statistic. Default is \code{"std. Statistic"}.} \item{string.resp}{Character vector, used for the column heading of of the response level for multinominal or categorical models. Default is \code{"Response"}.} \item{string.intercept}{Character vector, used as name for the intercept parameter. Default is \code{"(Intercept)"}.} \item{strings}{Named character vector, as alternative to arguments like \code{string.ci} or \code{string.p} etc. The name (lhs) must be one of the string-indicator from the aforementioned arguments, while the value (rhs) is the string that is used as column heading. E.g., \code{strings = c(ci = "Conf.Int.", se = "std. Err")} would be equivalent to setting \code{string.ci = "Conf.Int.", string.se = "std. Err"}.} \item{ci.hyphen}{Character vector, indicating the hyphen for confidence interval range. May be an HTML entity. See 'Examples'.} \item{minus.sign}{string, indicating the minus sign for negative numbers. May be an HTML entity. See 'Examples'.} \item{collapse.ci}{Logical, if \code{FALSE}, the CI values are shown in a separate table column.} \item{collapse.se}{Logical, if \code{FALSE}, the SE values are shown in a separate table column.} \item{linebreak}{Logical, if \code{TRUE} and \code{collapse.ci = FALSE} or \code{collapse.se = FALSE}, inserts a line break between estimate and CI resp. SE values. If \code{FALSE}, values are printed in the same line as estimate values.} \item{col.order}{Character vector, indicating which columns should be printed and in which order. Column names that are excluded from \code{col.order} are not shown in the table output. However, column names that are included, are only shown in the table when the related argument (like \code{show.est} for \code{"estimate"}) is set to \code{TRUE} or another valid value. Table columns are printed in the order as they appear in \code{col.order}.} \item{digits}{Amount of decimals for estimates} \item{digits.p}{Amount of decimals for p-values} \item{digits.rsq}{Amount of decimals for r-squared values} \item{digits.re}{Amount of decimals for random effects part of the summary table.} \item{emph.p}{Logical, if \code{TRUE}, significant p-values are shown bold faced.} \item{df.method, p.val}{Method for computing degrees of freedom for p-values, standard errors and confidence intervals (CI). Only applies to mixed models. Use \code{df.method = "wald"} for a faster, but less precise computation. This will use the residual degrees of freedom (as returned by \code{df.residual()}) for linear mixed models, and \code{Inf} degrees if freedom for all other model families. \code{df.method = "kenward"} (or \code{df.method = "kr"}) uses Kenward-Roger approximation for the degrees of freedom. \code{df.method = "satterthwaite"} uses Satterthwaite's approximation and \code{"ml1"} uses a "m-l-1" heuristic see \code{\link[parameters]{degrees_of_freedom}} for details). Use \code{show.df = TRUE} to show the approximated degrees of freedom for each coefficient.} \item{p.style}{Character, indicating if p-values should be printed as numeric value (\code{"numeric"}), as 'stars' (asterisks) only (\code{"stars"}), or scientific (\code{"scientific"}). Scientific and numeric style can be combined with "stars", e.g. \code{"numeric_stars"}} \item{p.threshold}{Numeric vector of length 3, indicating the treshold for annotating p-values with asterisks. Only applies if \code{p.style = "asterisk"}.} \item{p.adjust}{Character vector, if not \code{NULL}, indicates the method to adjust p-values. See \code{\link[stats]{p.adjust}} for details.} \item{case}{Desired target case. Labels will automatically converted into the specified character case. See \code{snakecase::to_any_case()} for more details on this argument. By default, if \code{case} is not specified, it will be set to \code{"parsed"}, unless \code{prefix.labels} is not \code{"none"}. If \code{prefix.labels} is either \code{"label"} (or \code{"l"}) or \code{"varname"} (or \code{"v"}) and \code{case} is not specified, it will be set to \code{NULL} - this is a more convenient default when prefixing labels.} \item{auto.label}{Logical, if \code{TRUE} (the default), and \href{https://strengejacke.github.io/sjlabelled/articles/intro_sjlabelled.html}{data is labelled}, \code{\link[sjlabelled]{term_labels}} is called to retrieve the labels of the coefficients, which will be used as predictor labels. If data is not labelled, \href{https://easystats.github.io/parameters/reference/format_parameters.html}{format_parameters()} is used to create pretty labels. If \code{auto.label = FALSE}, original variable names and value labels (factor levels) are used.} \item{prefix.labels}{Indicates whether the value labels of categorical variables should be prefixed, e.g. with the variable name or variable label. See argument \code{prefix} in \code{\link[sjlabelled]{term_labels}} for details.} \item{bpe}{For \strong{Stan}-models (fitted with the \pkg{rstanarm}- or \pkg{brms}-package), the Bayesian point estimate is, by default, the median of the posterior distribution. Use \code{bpe} to define other functions to calculate the Bayesian point estimate. \code{bpe} needs to be a character naming the specific function, which is passed to the \code{fun}-argument in \code{\link[sjmisc]{typical_value}}. So, \code{bpe = "mean"} would calculate the mean value of the posterior distribution.} \item{CSS}{A \code{\link{list}} with user-defined style-sheet-definitions, according to the \href{http://www.w3.org/Style/CSS/}{official CSS syntax}. See 'Details' or \href{https://strengejacke.github.io/sjPlot/articles/table_css.html}{this package-vignette}.} \item{file}{Destination file, if the output should be saved as file. If \code{NULL} (default), the output will be saved as temporary file and opened either in the IDE's viewer pane or the default web browser.} \item{use.viewer}{Logical, if \code{TRUE}, the HTML table is shown in the IDE's viewer pane. If \code{FALSE} or no viewer available, the HTML table is opened in a web browser.} \item{encoding}{Character vector, indicating the charset encoding used for variable and value labels. Default is \code{"UTF-8"}. For Windows Systems, \code{encoding = "Windows-1252"} might be necessary for proper display of special characters.} } \value{ Invisibly returns \itemize{ \item the web page style sheet (\code{page.style}), \item the web page content (\code{page.content}), \item the complete html-output (\code{page.complete}) and \item the html-table with inline-css for use with knitr (\code{knitr}) } for further use. } \description{ \code{tab_model()} creates HTML tables from regression models. } \details{ \subsection{Standardized Estimates}{ Default standardization is done by completely refitting the model on the standardized data. Hence, this approach is equal to standardizing the variables before fitting the model, which is particularly recommended for complex models that include interactions or transformations (e.g., polynomial or spline terms). When \code{show.std = "std2"}, standardization of estimates follows \href{http://www.stat.columbia.edu/~gelman/research/published/standardizing7.pdf}{Gelman's (2008)} suggestion, rescaling the estimates by dividing them by two standard deviations instead of just one. Resulting coefficients are then directly comparable for untransformed binary predictors. For backward compatibility reasons, \code{show.std} also may be a logical value; if \code{TRUE}, normal standardized estimates are printed (same effect as \code{show.std = "std"}). Use \code{show.std = NULL} (default) or \code{show.std = FALSE}, if no standardization is required. } \subsection{How do I use \code{CSS}-argument?}{ With the \code{CSS}-argument, the visual appearance of the tables can be modified. To get an overview of all style-sheet-classnames that are used in this function, see return value \code{page.style} for details. Arguments for this list have following syntax: \enumerate{ \item the class-names with \code{"css."}-prefix as argument name and \item each style-definition must end with a semicolon } You can add style information to the default styles by using a + (plus-sign) as initial character for the argument attributes. Examples: \itemize{ \item \code{css.table = 'border:2px solid red;'} for a solid 2-pixel table border in red. \item \code{css.summary = 'font-weight:bold;'} for a bold fontweight in the summary row. \item \code{css.lasttablerow = 'border-bottom: 1px dotted blue;'} for a blue dotted border of the last table row. \item \code{css.colnames = '+color:green'} to add green color formatting to column names. \item \code{css.arc = 'color:blue;'} for a blue text color each 2nd row. \item \code{css.caption = '+color:red;'} to add red font-color to the default table caption style. } } } \note{ The HTML tables can either be saved as file and manually opened (use argument \code{file}) or they can be saved as temporary files and will be displayed in the RStudio Viewer pane (if working with RStudio) or opened with the default web browser. Displaying resp. opening a temporary file is the default behaviour (i.e. \code{file = NULL}). \cr \cr Examples are shown in these three vignettes: \href{https://strengejacke.github.io/sjPlot/articles/tab_model_estimates.html}{Summary of Regression Models as HTML Table}, \href{https://strengejacke.github.io/sjPlot/articles/tab_mixed.html}{Summary of Mixed Models as HTML Table} and \href{https://strengejacke.github.io/sjPlot/articles/tab_bayes.html}{Summary of Bayesian Models as HTML Table}. } sjPlot/man/sjp.corr.Rd0000644000176200001440000000732613662304072014342 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sjPlotCorr.R \name{sjp.corr} \alias{sjp.corr} \title{Plot correlation matrix} \usage{ sjp.corr( data, title = NULL, axis.labels = NULL, sort.corr = TRUE, decimals = 3, na.deletion = c("listwise", "pairwise"), corr.method = c("pearson", "spearman", "kendall"), geom.colors = "RdBu", wrap.title = 50, wrap.labels = 20, show.legend = FALSE, legend.title = NULL, show.values = TRUE, show.p = TRUE, p.numeric = FALSE ) } \arguments{ \item{data}{Matrix with correlation coefficients as returned by the \code{\link{cor}}-function, or a \code{data.frame} of variables where correlations between columns should be computed.} \item{title}{character vector, used as plot title. Depending on plot type and function, will be set automatically. If \code{title = ""}, no title is printed. For effect-plots, may also be a character vector of length > 1, to define titles for each sub-plot or facet.} \item{axis.labels}{character vector with labels used as axis labels. Optional argument, since in most cases, axis labels are set automatically.} \item{sort.corr}{Logical, if \code{TRUE} (default), the axis labels are sorted according to the correlation strength. If \code{FALSE}, axis labels appear in order of how variables were included in the cor-computation or data frame.} \item{decimals}{Indicates how many decimal values after comma are printed when the values labels are shown. Default is 3. Only applies when \code{show.values = TRUE}.} \item{na.deletion}{Indicates how missing values are treated. May be either \code{"listwise"} (default) or \code{"pairwise"}. May be abbreviated.} \item{corr.method}{Indicates the correlation computation method. May be one of \code{"pearson"} (default), \code{"spearman"} or \code{"kendall"}. May be abbreviated.} \item{geom.colors}{user defined color for geoms. See 'Details' in \code{\link{plot_grpfrq}}.} \item{wrap.title}{numeric, determines how many chars of the plot title are displayed in one line and when a line break is inserted.} \item{wrap.labels}{numeric, determines how many chars of the value, variable or axis labels are displayed in one line and when a line break is inserted.} \item{show.legend}{logical, if \code{TRUE}, and depending on plot type and function, a legend is added to the plot.} \item{legend.title}{character vector, used as title for the plot legend.} \item{show.values}{Logical, whether values should be plotted or not.} \item{show.p}{Logical, adds significance levels to values, or value and variable labels.} \item{p.numeric}{Logical, if \code{TRUE}, the p-values are printed as numbers. If \code{FALSE} (default), asterisks are used.} } \value{ (Insisibily) returns the ggplot-object with the complete plot (\code{plot}) as well as the data frame that was used for setting up the ggplot-object (\code{df}) and the original correlation matrix (\code{corr.matrix}). } \description{ Plot correlation matrix as ellipses or tiles. } \details{ Required argument is either a \code{\link{data.frame}} or a matrix with correlation coefficients as returned by the \code{\link{cor}}-function. In case of ellipses, the ellipses size indicates the strength of the correlation. Furthermore, blue and red colors indicate positive or negative correlations, where stronger correlations are darker. } \note{ If \code{data} is a matrix with correlation coefficients as returned by the \code{\link{cor}}-function, p-values can't be computed. Thus, \code{show.p} and \code{p.numeric} only have an effect if \code{data} is a \code{\link{data.frame}}. } sjPlot/man/dist_chisq.Rd0000644000176200001440000000507013567425621014736 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/sjPlotDist.R \name{dist_chisq} \alias{dist_chisq} \title{Plot chi-squared distributions} \usage{ dist_chisq( chi2 = NULL, deg.f = NULL, p = NULL, xmax = NULL, geom.colors = NULL, geom.alpha = 0.7 ) } \arguments{ \item{chi2}{Numeric, optional. If specified, a chi-squared distribution with \code{deg.f} degrees of freedom is plotted and a shaded area at \code{chi2} value position is plotted that indicates whether or not the specified value is significant or not. If both \code{chi2} and \code{p} are not specified, a distribution without shaded area is plotted.} \item{deg.f}{Numeric. The degrees of freedom for the chi-squared distribution. Needs to be specified.} \item{p}{Numeric, optional. If specified, a chi-squared distribution with \code{deg.f} degrees of freedom is plotted and a shaded area at the position where the specified p-level starts is plotted. If both \code{chi2} and \code{p} are not specified, a distribution without shaded area is plotted.} \item{xmax}{Numeric, optional. Specifies the maximum x-axis-value. If not specified, the x-axis ranges to a value where a p-level of 0.00001 is reached.} \item{geom.colors}{user defined color for geoms. See 'Details' in \code{\link{plot_grpfrq}}.} \item{geom.alpha}{Specifies the alpha-level of the shaded area. Default is 0.7, range between 0 to 1.} } \description{ This function plots a simple chi-squared distribution or a chi-squared distribution with shaded areas that indicate at which chi-squared value a significant p-level is reached. } \examples{ # a simple chi-squared distribution # for 6 degrees of freedom dist_chisq(deg.f = 6) # a chi-squared distribution for 6 degrees of freedom, # and a shaded area starting at chi-squared value of ten. # With a df of 6, a chi-squared value of 12.59 would be "significant", # thus the shaded area from 10 to 12.58 is filled as "non-significant", # while the area starting from chi-squared value 12.59 is filled as # "significant" dist_chisq(chi2 = 10, deg.f = 6) # a chi-squared distribution for 6 degrees of freedom, # and a shaded area starting at that chi-squared value, which has # a p-level of about 0.125 (which equals a chi-squared value of about 10). # With a df of 6, a chi-squared value of 12.59 would be "significant", # thus the shaded area from 10 to 12.58 (p-level 0.125 to p-level 0.05) # is filled as "non-significant", while the area starting from chi-squared # value 12.59 (p-level < 0.05) is filled as "significant". dist_chisq(p = 0.125, deg.f = 6) } sjPlot/man/plot_grid.Rd0000644000176200001440000000341413624274771014571 0ustar liggesusers% Generated by roxygen2: do not edit by hand % Please edit documentation in R/plot_grid.R \name{plot_grid} \alias{plot_grid} \title{Arrange list of plots as grid} \usage{ plot_grid(x, margin = c(1, 1, 1, 1), tags = NULL) } \arguments{ \item{x}{A list of ggplot-objects. See 'Details'.} \item{margin}{A numeric vector of length 4, indicating the top, right, bottom and left margin for each plot, in centimetres.} \item{tags}{Add tags to your subfigures. Can be \code{TRUE} (letter tags) or character vector containing tags labels.} } \value{ An object of class \code{gtable}. } \description{ Plot multiple ggplot-objects as a grid-arranged single plot. } \details{ This function takes a \code{list} of ggplot-objects as argument. Plotting functions of this package that produce multiple plot objects (e.g., when there is an argument \code{facet.grid}) usually return multiple plots as list (the return value is named \code{plot.list}). To arrange these plots as grid as a single plot, use \code{plot_grid}. } \examples{ if (require("dplyr") && require("gridExtra")) { library(ggeffects) data(efc) # fit model fit <- glm( tot_sc_e ~ c12hour + e17age + e42dep + neg_c_7, data = efc, family = poisson ) # plot marginal effects for each predictor, each as single plot p1 <- ggpredict(fit, "c12hour") \%>\% plot(show.y.title = FALSE, show.title = FALSE) p2 <- ggpredict(fit, "e17age") \%>\% plot(show.y.title = FALSE, show.title = FALSE) p3 <- ggpredict(fit, "e42dep") \%>\% plot(show.y.title = FALSE, show.title = FALSE) p4 <- ggpredict(fit, "neg_c_7") \%>\% plot(show.y.title = FALSE, show.title = FALSE) # plot grid plot_grid(list(p1, p2, p3, p4)) # plot grid plot_grid(list(p1, p2, p3, p4), tags = TRUE) } } sjPlot/DESCRIPTION0000644000176200001440000000506414150203412013230 0ustar liggesusersPackage: sjPlot Type: Package Encoding: UTF-8 Title: Data Visualization for Statistics in Social Science Version: 2.8.10 Authors@R: c( person("Daniel", "Lüdecke", email = "d.luedecke@uke.de", role = c("aut", "cre"), comment = c(ORCID = "0000-0002-8895-3206")), person("Alexander", "Bartel", role = "ctb", comment = c(ORCID = "0000-0002-1280-6138")), person("Carsten", "Schwemmer", email = "carsten.schwemmer@uni-bamberg.de", role = "ctb"), person(given = "Chuck", family = "Powell", role = "ctb", email = "ibecav@gmail.com", comment = c(ORCID = "0000-0002-3606-2188")), person(given = "Amir", family = "Djalovski", role = "ctb", email = "Amir.DJV@gmail.com"), person(given = "Johannes", family = "Titz", role = "ctb", email = "johannes@titz.science", comment = c(ORCID = "0000-0002-1102-5719"))) Maintainer: Daniel Lüdecke Description: Collection of plotting and table output functions for data visualization. Results of various statistical analyses (that are commonly used in social sciences) can be visualized using this package, including simple and cross tabulated frequencies, histograms, box plots, (generalized) linear models, mixed effects models, principal component analysis and correlation matrices, cluster analyses, scatter plots, stacked scales, effects plots of regression models (including interaction terms) and much more. This package supports labelled data. License: GPL-3 Depends: R (>= 3.6) Imports: graphics, grDevices, stats, utils, bayestestR, datawizard, dplyr, effectsize, ggeffects, ggplot2 (>= 3.2.0), knitr, insight, MASS, parameters, performance, purrr, rlang, scales, sjlabelled (>= 1.1.2), sjmisc (>= 2.8.2), sjstats (>= 0.17.8), tidyr (>= 1.0.0) Suggests: brms, car, clubSandwich, cluster, cowplot, haven, GPArotation, ggrepel, glmmTMB, gridExtra, ggridges, httr, lme4, nFactors, pscl, psych, rmarkdown, rstanarm, sandwich, splines, survey, TMB, testthat URL: https://strengejacke.github.io/sjPlot/ BugReports: https://github.com/strengejacke/sjPlot/issues RoxygenNote: 7.1.2 VignetteBuilder: knitr NeedsCompilation: no Packaged: 2021-11-26 10:14:20 UTC; Daniel Author: Daniel Lüdecke [aut, cre] (), Alexander Bartel [ctb] (), Carsten Schwemmer [ctb], Chuck Powell [ctb] (), Amir Djalovski [ctb], Johannes Titz [ctb] () Repository: CRAN Date/Publication: 2021-11-26 16:10:50 UTC sjPlot/build/0000755000176200001440000000000014150131571012622 5ustar liggesuserssjPlot/build/vignette.rds0000644000176200001440000000120414150131571015156 0ustar liggesusersTKo1Ydlˌ5hL֡<U'DuBNMyԥ! $&C'W6t&=qv** ȝ!rl׹(&}T6BD8VF50uKoA̔3=DLU麢qdnY:ueg YjYǕʳҳ[항,Q,ȯROY+FZAf+Ƙ)jvGXKpsg|i5l].5nز2% WeyiUӥ$8Ŷ͵3"_4F[3d= KFϏ?:e.nulvOOGc+CLsjPlot/tests/0000755000176200001440000000000013446531455012701 5ustar liggesuserssjPlot/tests/testthat/0000755000176200001440000000000014150203412014517 5ustar liggesuserssjPlot/tests/testthat/test-plot_model_std.R0000644000176200001440000000575514147735034020663 0ustar liggesusers.runThisTest <- Sys.getenv("RunAllsjPlotTests") == "yes" if (suppressWarnings( require("testthat") && require("sjPlot") && require("sjmisc") && require("sjlabelled") && require("haven") && require("lme4") )) { context("sjPlot, tab_model type std") data(sleepstudy) data(iris) data(efc) efc <- to_factor(efc, e42dep, c172code, c161sex) m1 <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy, REML = F) m2 <- lmer(Sepal.Length ~ Sepal.Width + Petal.Length + (1 | Species), data = iris) m3 <- lm(neg_c_7 ~ e42dep + barthtot + c161sex, data = efc) test_that("plot_model", { p <- plot_model(m1) p <- plot_model(m2) p <- plot_model(m3) p <- plot_model(m1, type = "slope") p <- plot_model(m2, type = "slope") p <- plot_model(m3, type = "slope") p <- plot_model(m1, type = "resid") p <- plot_model(m2, type = "resid") p <- plot_model(m3, type = "resid") }) test_that("plot_model, std", { p <- plot_model(m1, type = "std") p <- plot_model(m1, type = "std2") p <- plot_model(m2, type = "std") p <- plot_model(m2, type = "std2") p <- plot_model(m3, type = "std") p <- plot_model(m3, type = "std2") }) if (.runThisTest) { if (suppressWarnings( require("testthat") && require("rstanarm") && require("sjPlot") && require("lme4") )) { # fit linear model data(sleepstudy) sleepstudy$age <- round(runif(nrow(sleepstudy), min = 20, max = 60)) sleepstudy$Rdicho <- dicho(sleepstudy$Reaction) m1 <- stan_glmer( Reaction ~ Days + age + (1 | Subject), data = sleepstudy, QR = TRUE, # this next line is only to keep the example small in size! chains = 2, cores = 1, seed = 12345, iter = 500 ) m2 <- stan_glmer( Rdicho ~ Days + age + (1 | Subject), data = sleepstudy, QR = TRUE, family = binomial, chains = 2, iter = 500 ) test_that("plot_model, rstan", { p <- plot_model(m1) p <- plot_model(m2) p <- plot_model(m1, bpe = "mean") p <- plot_model(m2, bpe = "mean") p <- plot_model(m1, bpe = "mean", bpe.style = "dot") p <- plot_model(m2, bpe = "mean", bpe.style = "dot") p <- plot_model(m1, bpe = "mean", bpe.style = "line", bpe.color = "green") p <- plot_model(m2, bpe = "mean", bpe.style = "line", bpe.color = "green") p <- plot_model(m1, bpe = "mean", bpe.style = "line", bpe.color = "green", prob.inner = .4, prob.outer = .8) p <- plot_model(m2, bpe = "mean", bpe.style = "line", bpe.color = "green", prob.inner = .4, prob.outer = .8) p <- plot_model(m1, bpe = "mean", bpe.style = "line", bpe.color = "green", prob.inner = .4, prob.outer = .8, size.inner = .5) p <- plot_model(m2, bpe = "mean", bpe.style = "line", bpe.color = "green", prob.inner = .4, prob.outer = .8, size.inner = .5) }) } } } sjPlot/tests/testthat/test-tab_model.R0000644000176200001440000000624014147735034017567 0ustar liggesusers.runThisTest <- Sys.getenv("RunAllsjPlotTests") == "yes" if (suppressWarnings( require("testthat") && require("sjPlot") && require("sjlabelled") && require("haven") && require("sjmisc") && require("lme4") && require("glmmTMB") && interactive() )) { data(sleepstudy) data(Salamanders) data(iris) data(efc) efc <- to_factor(efc, e42dep, c172code, c161sex) m1 <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy, REML = F) m2 <- lmer(Sepal.Length ~ Sepal.Width + Petal.Length + (1 | Species), data = iris) m3 <- lm(neg_c_7 ~ e42dep + barthtot + c161sex, data = efc) m4 <- glmmTMB( count ~ spp + mined + (1 | site), ziformula = ~ spp + mined, family = truncated_nbinom2, Salamanders ) test_that("tab_model", { p <- tab_model(m1, m2, m3) }) test_that("tab_model", { tab_model(m1, m2, m3, m4) }) test_that("tab_model, check shows", { p <- tab_model(m1, m2, m3, show.intercept = FALSE, show.fstat = TRUE, show.se = TRUE) p <- tab_model(m1, m2, m3, show.intercept = FALSE, show.fstat = TRUE, show.se = TRUE, show.ci = F, show.df = TRUE, p.val = "kr") }) test_that("tab_model, check terms", { p <- tab_model(m1, m2, m3, show.intercept = FALSE, show.fstat = TRUE, show.se = TRUE, terms = c("Days", "Sepal.Width", "c161sex2", "barthtot")) p <- tab_model(m1, m2, m3, show.intercept = FALSE, show.fstat = TRUE, show.se = TRUE, show.ci = F, show.df = TRUE, p.val = "kr", rm.terms = c("Days", "Sepal.Width", "c161sex2", "barthtot")) p <- tab_model(m1, m2, m3, show.intercept = FALSE, show.fstat = TRUE, show.se = TRUE, show.ci = F, show.df = TRUE, rm.terms = c("Sepal.Width", "c161sex2", "barthtot")) }) test_that("tab_model, std", { p <- tab_model(m1, show.std = "std") p <- tab_model(m1, show.std = "std2") p <- tab_model(m2, show.std = "std") p <- tab_model(m2, show.std = "std2") p <- tab_model(m1, m2, show.std = "std") p <- tab_model(m1, m2, m3, show.std = "std") p <- tab_model(m1, m2, m3, show.std = "std2") }) if (.runThisTest) { if (suppressWarnings( require("testthat") && require("rstanarm") && require("sjPlot") && require("lme4") )) { # fit linear model data(sleepstudy) sleepstudy$age <- round(runif(nrow(sleepstudy), min = 20, max = 60)) sleepstudy$Rdicho <- dicho(sleepstudy$Reaction) m1 <- stan_glmer( Reaction ~ Days + age + (1 | Subject), data = sleepstudy, QR = TRUE, # this next line is only to keep the example small in size! chains = 2, cores = 1, seed = 12345, iter = 500 ) m2 <- stan_glmer( Rdicho ~ Days + age + (1 | Subject), data = sleepstudy, QR = TRUE, family = binomial, chains = 2, iter = 500 ) test_that("tab_model, rstan", { p <- tab_model(m1) p <- tab_model(m2) p <- tab_model(m1, m2) p <- tab_model(m1, m2, show.ci50 = FALSE) p <- tab_model(m1, m2, col.order = c("ci.outer", "ci.inner", "est")) p <- tab_model(m1, m2, bpe = "mean") }) } } } sjPlot/tests/testthat/test-plot_grpfrq.R0000644000176200001440000000660114147735034020201 0ustar liggesusersif (suppressWarnings( require("testthat") && require("sjlabelled") && require("haven") && require("sjPlot") )) { # glm, logistic regression ---- data(efc) efc$gewicht <- rnorm(nrow(efc), 1, .2) test_that("plot_grpfrq", { p <- plot_grpfrq(efc$e17age, efc$e16sex) p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "dot") p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "line") p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "boxplot") p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "violin") p <- plot_grpfrq(efc$e17age, efc$e16sex, bar.pos = "stack") p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "boxplot", intr.var = efc$c172code) p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "violin", intr.var = efc$c172code) p <- plot_grpfrq(efc$e17age, efc$e16sex, show.values = FALSE) p <- plot_grpfrq(efc$e17age, efc$e16sex, show.values = FALSE, show.n = TRUE) p <- plot_grpfrq(efc$e17age, efc$e16sex, show.values = TRUE, show.n = TRUE) p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "dot", show.values = TRUE, show.n = TRUE) p <- plot_grpfrq(efc$e17age, efc$e16sex, show.values = TRUE, show.n = TRUE, show.prc = TRUE) p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "dot", show.values = TRUE, show.n = TRUE, show.prc = TRUE) p <- plot_grpfrq(efc$e17age, efc$e16sex, show.grpcnt = TRUE) expect_message(p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "boxplot", show.grpcnt = TRUE)) expect_message(p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "boxplot", intr.var = efc$c172code, show.grpcnt = TRUE)) p <- plot_grpfrq(efc$e42dep, efc$e16sex, type = "bar", show.grpcnt = TRUE) p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "dot") p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "dot", show.ci = T) p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "line", show.ci = T) p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "boxplot", show.ci = T) p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "violin", show.ci = T) p <- plot_grpfrq(efc$e42dep, efc$e16sex, weight.by = efc$gewicht) p <- plot_grpfrq(efc$e42dep, efc$e16sex, type = "dot", weight.by = efc$gewicht) p <- plot_grpfrq(efc$e42dep, efc$e16sex, type = "line", weight.by = efc$gewicht) p <- plot_grpfrq(efc$e42dep, efc$e16sex, type = "boxplot", weight.by = efc$gewicht) p <- plot_grpfrq(efc$e42dep, efc$e16sex, type = "violin", weight.by = efc$gewicht) p <- plot_grpfrq(efc$e42dep, efc$e16sex, legend.title = "Geschlecht", legend.labels = c("M", "W"), axis.titles = "Dependency", axis.labels = c("gar nicht", "leicht", "mittel", "schwer")) p <- plot_grpfrq(efc$e42dep, efc$e16sex, type = "dot", legend.title = "Geschlecht", legend.labels = c("M", "W"), axis.titles = "Dependency", axis.labels = c("gar nicht", "leicht", "mittel", "schwer")) p <- plot_grpfrq(efc$e42dep, efc$e16sex, type = "line", legend.title = "Geschlecht", legend.labels = c("M", "W"), axis.titles = "Dependency", axis.labels = c("gar nicht", "leicht", "mittel", "schwer")) p <- plot_grpfrq(efc$e42dep, efc$e16sex, type = "boxplot", legend.title = "Geschlecht", legend.labels = c("M", "W"), axis.titles = "Dependency", axis.labels = c("A", "B")) p <- plot_grpfrq(efc$e17age, efc$e16sex, type = "violin", legend.title = "Geschlecht", legend.labels = c("M", "W"), axis.titles = "Dependency", axis.labels = c("A", "B")) }) } sjPlot/tests/testthat.R0000644000176200001440000000027313446531455014666 0ustar liggesuserslibrary(testthat) library(sjPlot) if (length(strsplit(packageDescription("sjPlot")$Version, "\\.")[[1]]) > 3) { Sys.setenv("RunAllsjPlotTests" = "yes") } test_check("sjPlot") sjPlot/vignettes/0000755000176200001440000000000014150131571013533 5ustar liggesuserssjPlot/vignettes/tab_mixed.Rmd0000644000176200001440000000702613612122336016141 0ustar liggesusers--- title: "Summary of Mixed Models as HTML Table" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette params: EVAL: !r identical(Sys.getenv("NOT_CRAN"), "true") --- ```{r echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE ) if (!requireNamespace("lme4", quietly = TRUE) || !requireNamespace("glmmTMB", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) } ``` This vignette shows examples for using `tab_model()` to create HTML tables for mixed models. Basically, `tab_model()` behaves in a very similar way for mixed models as for other, simple regression models, as shown [in this vignette](tab_model_estimates.html). ```{r, results='hide', message=FALSE, warning=FALSE} # load required packages library(sjPlot) library(lme4) data("sleepstudy") data("efc") efc$cluster <- as.factor(efc$e15relat) ``` ## Mixed models summaries as HTML table Unlike tables for [non-mixed models](tab_model_estimates.html), `tab_models()` adds additional information on the random effects to the table output for mixed models. You can hide these information with `show.icc = FALSE` and `show.re.var = FALSE`. Furthermore, the R-squared values are marginal and conditional R-squared statistics, based on _Nakagawa et al. 2017_. ```{r} m1 <- lmer(neg_c_7 ~ c160age + c161sex + e42dep + (1 | cluster), data = efc) m2 <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) tab_model(m1, m2) ``` The marginal R-squared considers only the variance of the fixed effects, while the conditional R-squared takes both the fixed and random effects into account. The p-value is a simple approximation, based on the t-statistics and using the normal distribution function. A more precise p-value can be computed using `p.val = "kr"`. In this case, which only applies to linear mixed models, the computation of p-values is based on conditional F-tests with Kenward-Roger approximation for the degrees of freedom (using the using the **pbkrtest**-package). Note that here the computation is more time consuming and thus not used as default. You can also display the approximated degrees of freedom with `show.df`. ```{r} tab_model(m1, p.val = "kr", show.df = TRUE) ``` ## Generalized linear mixed models `tab_model()` can also print and combine models with different link-functions. ```{r} data("efc") efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1) efc$cluster <- as.factor(efc$e15relat) m3 <- glmer( neg_c_7d ~ c160age + c161sex + e42dep + (1 | cluster), data = efc, family = binomial(link = "logit") ) tab_model(m1, m3) ``` ## More complex models Finally, an example from the **glmmTMB**-package to show how easy it is to print zero-inflated generalized linear mixed models as HTML table. ```{r} library(glmmTMB) data("Salamanders") m4 <- glmmTMB( count ~ spp + mined + (1 | site), ziformula = ~ spp + mined, family = truncated_poisson(link = "log"), data = Salamanders ) tab_model(m1, m3, m4, show.ci = FALSE) ``` ## References Nakagawa S, Johnson P, Schielzeth H (2017) _The coefficient of determination R2 and intra-class correlation coefficient from generalized linear mixed-effects models revisted and expanded._ J. R. Soc. Interface 14. doi: 10.1098/rsif.2017.0213 sjPlot/vignettes/plot_interactions.Rmd0000644000176200001440000001321414147735034017752 0ustar liggesusers--- title: "Plotting Interaction Effects of Regression Models" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Plotting Interaction Effects of Regression Models} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r set-options, echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", dev = "png", fig.width = 7, fig.height = 3.5, message = FALSE, warning = FALSE) options(width = 800, tibble.width = Inf) if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("ggplot2", quietly = TRUE) || !requireNamespace("sjlabelled", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ``` This document describes how to plot marginal effects of interaction terms from various regression models, using the `plot_model()` function. `plot_model()` is a generic plot-function, which accepts many model-objects, like `lm`, `glm`, `lme`, `lmerMod` etc. `plot_model()` allows to create various plot tyes, which can be defined via the `type`-argument. The default is `type = "fe"`, which means that fixed effects (model coefficients) are plotted. To plot marginal effects of interaction terms, call `plot_model()` with: * `type = "pred"` to plot predicted values (marginal effects) for specific model terms, including interaction terms. * `type = "eff"`, which is similar to `type = "pred"`, however, discrete predictors are held constant at their proportions (not reference level). It internally calls \code{\link[effects]{Effect}} via \code{\link[ggeffects]{ggeffect}}. * `type = "emm"`, which is similar to `type = "eff"`. It internally calls \code{\link[emmeans]{emmeans}} via \code{\link[ggeffects]{ggemmeans}}. * `type = "int"` to plot marginal effects of interaction terms in a more convenient way. `plot_model()` supports [labelled data](https://cran.r-project.org/package=sjlabelled) and automatically uses variable and value labels to annotate the plot. This works with most regression modelling functions. ***Note:** For marginal effects plots, **sjPlot** calls functions from the [**ggeffects-package**](https://strengejacke.github.io/ggeffects/). If you need more flexibility when creating marginal effects plots, consider directly using the **ggeffects**-package.* # Two-Way-Interactions _Note: To better understand the principle of plotting interaction terms, it might be helpful to read the vignette on [marginal effects](plot_marginal_effects.html) first._ To plot marginal effects of interaction terms, at least two model terms need to be specified (the terms that define the interaction) in the `terms`-argument, for which the effects are computed. To plot marginal effects for three-way-interactions, all three terms need to be specified in `terms`. A convenient way to automatically plot interactions is `type = "int"`, which scans the model formula for interaction terms and then uses these as `terms`-argument. ```{r} library(sjPlot) library(sjmisc) library(ggplot2) data(efc) theme_set(theme_sjplot()) # make categorical efc$c161sex <- to_factor(efc$c161sex) # fit model with interaction fit <- lm(neg_c_7 ~ c12hour + barthtot * c161sex, data = efc) plot_model(fit, type = "pred", terms = c("barthtot", "c161sex")) ``` For `type = "int"`, no terms need to be specified. Note that this plot type automatically uses the first interaction term in the formula for the x-axis, while the second term is used as grouping factor. Furthermore, if continuous variables are used as second term, you can specify preset-values for this term with the `mdrt.values`-argument, which are then used as grouping levels. In this example, the second term is a factor with two levels (male/female), so there is no need for choosing specific values for the moderator. ```{r} plot_model(fit, type = "int") ``` To switch the terms, in this example _barthtot_ and _c161sex_, simply switch the order of these terms on the `terms`-argument and use `type = "pred"`. ```{r} plot_model(fit, type = "pred", terms = c("c161sex", "barthtot [0, 100]")) ``` To switch the terms for plot-type `type = "int"`, you need to re-fit the model and change the formula accordingly, i.e. using _c161sex_ as first term in the interaction. ```{r} # fit model with interaction, switching terms in formula fit <- lm(neg_c_7 ~ c12hour + c161sex * barthtot, data = efc) plot_model(fit, type = "int") ``` By default, for continuous variables, the minimum and maximum values are chosen as grouping levels, which are 0 and 100 - that's why the previous two plots are identical. You have other options as well, e.g. the mean-value and +/- 1 standard deviation (as suggested by Cohen and Cohen for continuous variables and popularized by Aiken and West 1991), which can be specified using `mdrt.values`. ```{r} plot_model(fit, type = "int", mdrt.values = "meansd") ``` # Three-Way-Interactions Since the `terms`-argument accepts up to three model terms, you can also compute marginal effects for a 3-way-interaction. ```{r} # fit model with 3-way-interaction fit <- lm(neg_c_7 ~ c12hour * barthtot * c161sex, data = efc) # select only levels 30, 50 and 70 from continuous variable Barthel-Index plot_model(fit, type = "pred", terms = c("c12hour", "barthtot [30,50,70]", "c161sex")) ``` Again, `type = "int"` will automatically plot the interaction terms, however, using `mdrt.values = "minmax"` as default - in this case, the "levels" 0 and 100 from continuous variable _barthtot_ are chosen by default. ```{r} plot_model(fit, type = "int") ``` # References Aiken and West (1991). _Multiple Regression: Testing and Interpreting Interactions._ sjPlot/vignettes/tab_model_estimates.Rmd0000644000176200001440000002606714073077247020233 0ustar liggesusers--- title: "Summary of Regression Models as HTML Table" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Summary of Regression Models as HTML Table} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE) if (!requireNamespace("sjlabelled", quietly = TRUE) || !requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("lme4", quietly = TRUE) || !requireNamespace("pscl", quietly = TRUE) || !requireNamespace("glmmTMB", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) library(sjPlot) } ``` `tab_model()` is the pendant to `plot_model()`, however, instead of creating plots, `tab_model()` creates HTML-tables that will be displayed either in your IDE's viewer-pane, in a web browser or in a knitr-markdown-document (like this vignette). HTML is the only output-format, you can't (directly) create a LaTex or PDF output from `tab_model()` and related table-functions. However, it is possible to easily export the tables into Microsoft Word or Libre Office Writer. This vignette shows how to create table from regression models with `tab_model()`. There's a dedicated vignette that demonstrate how to change the [table layout and appearance with CSS](table_css.html). **Note!** Due to the custom CSS, the layout of the table inside a knitr-document differs from the output in the viewer-pane and web browser! ```{r} # load package library(sjPlot) library(sjmisc) library(sjlabelled) # sample data data("efc") efc <- as_factor(efc, c161sex, c172code) ``` ## A simple HTML table from regression results First, we fit two linear models to demonstrate the `tab_model()`-function. ```{r, results='hide'} m1 <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) m2 <- lm(neg_c_7 ~ c160age + c12hour + c161sex + e17age, data = efc) ``` The simplest way of producing the table output is by passing the fitted model as parameter. By default, estimates, confidence intervals (_CI_) and p-values (_p_) are reported. As summary, the numbers of observations as well as the R-squared values are shown. ```{r} tab_model(m1) ``` ## Automatic labelling As the **sjPlot**-packages features [labelled data](https://strengejacke.github.io/sjlabelled/), the coefficients in the table are already labelled in this example. The name of the dependent variable(s) is used as main column header for each model. For non-labelled data, the coefficient names are shown. ```{r} data(mtcars) m.mtcars <- lm(mpg ~ cyl + hp + wt, data = mtcars) tab_model(m.mtcars) ``` If factors are involved and `auto.label = TRUE`, "pretty" parameters names are used (see [`format_parameters()`](https://easystats.github.io/parameters/reference/format_parameters.html). ```{r} set.seed(2) dat <- data.frame( y = runif(100, 0, 100), drug = as.factor(sample(c("nonsense", "useful", "placebo"), 100, TRUE)), group = as.factor(sample(c("control", "treatment"), 100, TRUE)) ) pretty_names <- lm(y ~ drug * group, data = dat) tab_model(pretty_names) ``` ### Turn off automatic labelling To turn off automatic labelling, use `auto.label = FALSE`, or provide an empty character vector for `pred.labels` and `dv.labels`. ```{r} tab_model(m1, auto.label = FALSE) ``` Same for models with non-labelled data and factors. ```{r} tab_model(pretty_names, auto.label = FALSE) ``` ## More than one model `tab_model()` can print multiple models at once, which are then printed side-by-side. Identical coefficients are matched in a row. ```{r} tab_model(m1, m2) ``` ## Generalized linear models For generalized linear models, the ouput is slightly adapted. Instead of _Estimates_, the column is named _Odds Ratios_, _Incidence Rate Ratios_ etc., depending on the model. The coefficients are in this case automatically converted (exponentiated). Furthermore, pseudo R-squared statistics are shown in the summary. ```{r} m3 <- glm( tot_sc_e ~ c160age + c12hour + c161sex + c172code, data = efc, family = poisson(link = "log") ) efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1) m4 <- glm( neg_c_7d ~ c161sex + barthtot + c172code, data = efc, family = binomial(link = "logit") ) tab_model(m3, m4) ``` ### Untransformed estimates on the linear scale To plot the estimates on the linear scale, use `transform = NULL`. ```{r} tab_model(m3, m4, transform = NULL, auto.label = FALSE) ``` ## More complex models Other models, like hurdle- or zero-inflated models, also work with `tab_model()`. In this case, the zero inflation model is indicated in the table. Use `show.zeroinf = FALSE` to hide this part from the table. ```{r} library(pscl) data("bioChemists") m5 <- zeroinfl(art ~ fem + mar + kid5 + ment | kid5 + phd + ment, data = bioChemists) tab_model(m5) ``` You can combine any model in one table. ```{r} tab_model(m1, m3, m5, auto.label = FALSE, show.ci = FALSE) ``` ## Show or hide further columns `tab_model()` has some argument that allow to show or hide specific columns from the output: * `show.est` to show/hide the column with model estimates. * `show.ci` to show/hide the column with confidence intervals. * `show.se` to show/hide the column with standard errors. * `show.std` to show/hide the column with standardized estimates (and their standard errors). * `show.p` to show/hide the column with p-values. * `show.stat` to show/hide the column with the coefficients' test statistics. * `show.df` for linear mixed models, when p-values are based on degrees of freedom with Kenward-Rogers approximation, these degrees of freedom are shown. ### Adding columns In the following example, standard errors, standardized coefficients and test statistics are also shown. ```{r} tab_model(m1, show.se = TRUE, show.std = TRUE, show.stat = TRUE) ``` ### Removing columns In the following example, default columns are removed. ```{r} tab_model(m3, m4, show.ci = FALSE, show.p = FALSE, auto.label = FALSE) ``` ### Removing and sorting columns Another way to remove columns, which also allows to reorder the columns, is the `col.order`-argument. This is a character vector, where each element indicates a column in the output. The value `"est"`, for instance, indicates the estimates, while `"std.est"` is the column for standardized estimates and so on. By default, `col.order` contains all possible columns. All columns that should shown (see previous tables, for example using `show.se = TRUE` to show standard errors, or `show.st = TRUE` to show standardized estimates) are then printed by default. Colums that are _excluded_ from `col.order` are _not shown_, no matter if the `show*`-arguments are `TRUE` or `FALSE`. So if `show.se = TRUE`, but`col.order` does not contain the element `"se"`, standard errors are not shown. On the other hand, if `show.est = FALSE`, but `col.order` _does include_ the element `"est"`, the columns with estimates are not shown. In summary, `col.order` can be used to _exclude_ columns from the table and to change the order of colums. ```{r} tab_model( m1, show.se = TRUE, show.std = TRUE, show.stat = TRUE, col.order = c("p", "stat", "est", "std.se", "se", "std.est") ) ``` ### Collapsing columns With `collapse.ci` and `collapse.se`, the columns for confidence intervals and standard errors can be collapsed into one column together with the estimates. Sometimes this table layout is required. ```{r} tab_model(m1, collapse.ci = TRUE) ``` ## Defining own labels There are different options to change the labels of the column headers or coefficients, e.g. with: * `pred.labels` to change the names of the coefficients in the _Predictors_ column. Note that the length of `pred.labels` must exactly match the amount of predictors in the _Predictor_ column. * `dv.labels` to change the names of the model columns, which are labelled with the variable labels / names from the dependent variables. * Further more, there are various `string.*`-arguments, to change the name of column headings. ```{r} tab_model( m1, m2, pred.labels = c("Intercept", "Age (Carer)", "Hours per Week", "Gender (Carer)", "Education: middle (Carer)", "Education: high (Carer)", "Age (Older Person)"), dv.labels = c("First Model", "M2"), string.pred = "Coeffcient", string.ci = "Conf. Int (95%)", string.p = "P-Value" ) ``` ## Including reference level of categorical predictors By default, for categorical predictors, the variable names and the categories for regression coefficients are shown in the table output. ```{r} library(glmmTMB) data("Salamanders") model <- glm( count ~ spp + Wtemp + mined + cover, family = poisson(), data = Salamanders ) tab_model(model) ``` You can include the reference level for categorical predictors by setting `show.reflvl = TRUE`. ```{r} tab_model(model, show.reflvl = TRUE) ``` To show variable names, categories and include the reference level, also set `prefix.labels = "varname"`. ```{r} tab_model(model, show.reflvl = TRUE, prefix.labels = "varname") ``` ## Style of p-values You can change the style of how p-values are displayed with the argument `p.style`. With `p.style = "stars"`, the p-values are indicated as `*` in the table. ```{r} tab_model(m1, m2, p.style = "stars") ``` Another option would be scientific notation, using `p.style = "scientific"`, which also can be combined with `digits.p`. ```{r} tab_model(m1, m2, p.style = "scientific", digits.p = 2) ``` ### Automatic matching for named vectors Another way to easily assign labels are _named vectors_. In this case, it doesn't matter if `pred.labels` has more labels than coefficients in the model(s), or in which order the labels are passed to `tab_model()`. The only requirement is that the labels' names equal the coefficients names as they appear in the `summary()`-output. ```{r} # example, coefficients are "c161sex2" or "c172code3" summary(m1) pl <- c( `(Intercept)` = "Intercept", e17age = "Age (Older Person)", c160age = "Age (Carer)", c12hour = "Hours per Week", barthtot = "Barthel-Index", c161sex2 = "Gender (Carer)", c172code2 = "Education: middle (Carer)", c172code3 = "Education: high (Carer)", a_non_used_label = "We don't care" ) tab_model( m1, m2, m3, m4, pred.labels = pl, dv.labels = c("Model1", "Model2", "Model3", "Model4"), show.ci = FALSE, show.p = FALSE, transform = NULL ) ``` ## Keep or remove coefficients from the table Using the `terms`- or `rm.terms`-argument allows us to explicitly show or remove specific coefficients from the table output. ```{r} tab_model(m1, terms = c("c160age", "c12hour")) ``` Note that the names of terms to keep or remove should match the coefficients names. For categorical predictors, one example would be: ```{r} tab_model(m1, rm.terms = c("c172code2", "c161sex2")) ``` sjPlot/vignettes/custplot.Rmd0000644000176200001440000001514414147735034016073 0ustar liggesusers--- title: "Customize Plot Appearance" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Customize Plot Appearance} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, warning = FALSE, message = FALSE) if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("haven", quietly = TRUE) || !requireNamespace("ggplot2", quietly = TRUE) || !requireNamespace("sjlabelled", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ``` This vignette shows how the plots created by the `sjp.*` and `plot_model()` functions of the **sjPlot** package can be customized. The examples refer to `plot_grpfrq()`, but most arguments are similar across all plotting function of the **sjPlot** package. ## Tweaking plot appearance The base function to globally change theme option for all sjp-function is `set_theme()`. Except for geom-colors and geom-sizes, all theme-options can be set via this function. This new theme will be applied to all following plots created with the **sjPlot** package. There are various arguments to change colors, sizes, angles etc. of labels. Following example show changes to colors, sizes, angles, geom-outlines and theme. ```{r} # load libraries library(sjPlot) # for plotting library(sjmisc) # for sample data library(ggplot2) # to access ggplot-themes # load sample data set data(efc) set_theme( geom.outline.color = "antiquewhite4", geom.outline.size = 1, geom.label.size = 2, geom.label.color = "grey50", title.color = "red", title.size = 1.5, axis.angle.x = 45, axis.textcolor = "blue", base = theme_bw() ) plot_grpfrq( efc$e42dep, efc$e16sex, title = NULL, geom.colors = c("cadetblue", "coral"), geom.size = 0.4 ) ``` ## Using the Color Brewer palettes All plotting functions support the usage of the [Colorbrewer]( https://colorbrewer2.org/) palettes. To apply a color brewer palette, use specify the palette as `geom.colors`. Any valid color brewer palette is recognized automatically. ```{r} # blank theme set_theme( base = theme_blank(), axis.title.size = .9, axis.textsize = .9, legend.size = .7, legend.title.size = .8, geom.label.size = 3 ) plot_grpfrq( efc$e42dep, efc$e15relat, geom.colors = "PuRd", show.values = FALSE ) ``` An overview of all supported color codes can be obtained with `display.brewer.all()` from the `RColorBrewer` package. ```{r, eval=FALSE} library(RColorBrewer) display.brewer.all() ``` ## Plot with flipped coordinates The plot's axes can be flipped using `coord.flip = TRUE`. If needed, labels can be placed inside the bars with the `vjust` or `hjust` arguments. In such cases, you might need to adjust the label colors with `geom.label.color = "white"`. ```{r} set_theme(geom.label.color = "white", geom.label.size = 3) # labels appear very large due to export metrics plot_grpfrq(efc$e42dep, efc$e16sex, coord.flip = TRUE) ``` ## Adding plot margins Plots with no margins towards the axes may look strange to some people (not to me, though). To restore the ggplot-default behaviour, use the `expand.grid` argument: ```{r results='hide', echo=FALSE} set_theme( axis.title.size = .9, axis.textsize = .9, legend.size = .7, legend.title.size = .8, geom.label.size = 3 ) ``` ```{r} plot_grpfrq(efc$e42dep, efc$e16sex, expand.grid = TRUE) ``` ## Theme options You can use any pre-defined theme from ggplot, like `theme_bw()`, `theme_classic()` or `theme_minimal()` as default theme. ```{r} set_theme(base = theme_light()) plot_frq(efc$e42dep) ``` ## Pre-defined themes There is a set of pre-defined themes from the sjPlot-package. See `?"sjPlot-themes"` for a complete list. ```{r} library(sjmisc) data(efc) efc <- to_factor(efc, e42dep, c172code) m <- lm(neg_c_7 ~ pos_v_4 + c12hour + e42dep + c172code, data = efc) # reset theme set_theme(base = theme_grey()) # forest plot of regression model p <- plot_model(m) # default theme p # pre-defined theme p + theme_sjplot() ``` ## Pre-defined scales There is also a new scale for **ggplot**-objects, `scale_color_sjplot()` and `scale_fill_sjplot()`. ```{r} p + theme_sjplot2() + scale_color_sjplot("simply") ``` To see all currently available color sets, use `show_sjplot_pals()`. ```{r} show_sjplot_pals() ``` ## Set up own themes based on existing themes If you want to use a specific theme as base for building your own theme, use the `base` argument. When using `base` instead of `theme`, further arguments for settings colors etc. are not ignored. ```{r} set_theme(base = theme_bw(), axis.linecolor = "darkgreen") plot_frq(efc$e42dep) ``` ## Further customization options Each plotting function invisibly returns the ggplot-object. You can further add options to customize the appearance of the plot, like in the following example, where the count axis is hidden (color set to white): ```{r} set_theme( base = theme_classic(), axis.tickslen = 0, # hides tick marks axis.title.size = .9, axis.textsize = .9, legend.size = .7, legend.title.size = .8, geom.label.size = 3.5 ) plot_grpfrq( efc$e42dep, efc$e16sex, coord.flip = TRUE, show.axis.values = FALSE ) + theme(axis.line.x = element_line(color = "white")) ``` ## Plot legend The plot's legend can be customized via various `legend.`-arguments, see following examples: ```{r} set_theme( base = theme_classic(), legend.title.face = "italic", # title font face legend.inside = TRUE, # legend inside plot legend.color = "grey50", # legend label color legend.pos = "bottom right", # legend position inside plot axis.title.size = .9, axis.textsize = .9, legend.size = .7, legend.title.size = .8, geom.label.size = 3 ) plot_grpfrq(efc$e42dep, efc$e16sex, coord.flip = TRUE) ``` ```{r} set_theme( base = theme_classic(), axis.linecolor = "white", # "remove" axis lines axis.textcolor.y = "darkred", # set axis label text only for y axis axis.tickslen = 0, # "remove" tick marks legend.title.color = "red", # legend title color legend.title.size = 2, # legend title size legend.color = "green", # legend label color legend.pos = "top", # legend position above plot axis.title.size = .9, axis.textsize = .9, legend.size = .7, geom.label.size = 3 ) plot_grpfrq(efc$e42dep, efc$e16sex) ``` sjPlot/vignettes/blackwhitefigures.Rmd0000644000176200001440000000470414147735034017720 0ustar liggesusers--- title: "Black & White Figures for Print Journals" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Black & White Figures for Print Journals} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", dev = "png", fig.width = 7, fig.height = 5, message = FALSE, warning = FALSE) if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("haven", quietly = TRUE) || !requireNamespace("ggplot2", quietly = TRUE) || !requireNamespace("sjlabelled", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ``` This document shows examples how to create b/w figures, e.g. if you don't want colored figures for print-journals. ## Barplots in grey-scaled colors There are two ways to create plots in black and white or greyscale. For bar plots, `geom.colors = "gs"` creates a plot using a greyscale (based on `scales::grey_pal()`). ```{r} library(sjPlot) library(sjmisc) library(sjlabelled) library(ggplot2) theme_set(theme_bw()) data(efc) plot_grpfrq(efc$e42dep, efc$c172code, geom.colors = "gs") ``` ## Lineplots in b/w with different linetypes Similar to barplots, lineplots - mostly from `plot_model()` - can be plotted in greyscale as well (with `colors = "gs"`). However, in most cases lines colored in greyscale are difficult to distinguish. In this case, `plot_model()` supports black & white figures with different linetypes. Use `colors = "bw"` to create a b/w-plot. ```{r} # create binrary response y <- ifelse(efc$neg_c_7 < median(na.omit(efc$neg_c_7)), 0, 1) # create data frame for fitting model df <- data.frame( y = to_factor(y), sex = to_factor(efc$c161sex), dep = to_factor(efc$e42dep), barthel = efc$barthtot, education = to_factor(efc$c172code) ) # set variable label for response set_label(df$y) <- "High Negative Impact" # fit model fit <- glm(y ~., data = df, family = binomial(link = "logit")) # plot marginal effects plot_model( fit, type = "pred", terms = c("barthel", "sex","dep"), colors = "bw", ci.lvl = NA ) ``` Different linetypes do not apply to all linetyped plots, if these usually only plot a single line - so there's no need for different linetypes, and you can just set `colors = "black"` (or `colors = "bw"`). ```{r} # plot coefficients plot_model(fit, colors = "black") ``` sjPlot/vignettes/plot_model_estimates.Rmd0000644000176200001440000002127614073077247020440 0ustar liggesusers--- title: "Plotting Estimates (Fixed Effects) of Regression Models" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Plotting Estimates (Fixed Effects) of Regression Models} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", dev = "png", fig.width = 7, fig.height = 5, message = FALSE, warning = FALSE) if (!requireNamespace("sjlabelled", quietly = TRUE) || !requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("ggplot2", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) library(sjPlot) } ``` This document describes how to plot estimates as forest plots (or dot whisker plots) of various regression models, using the `plot_model()` function. `plot_model()` is a generic plot-function, which accepts many model-objects, like `lm`, `glm`, `lme`, `lmerMod` etc. `plot_model()` allows to create various plot tyes, which can be defined via the `type`-argument. The default is `type = "fe"`, which means that fixed effects (model coefficients) are plotted. For mixed effects models, only fixed effects are plotted by default as well. ```{r results='hide'} library(sjPlot) library(sjlabelled) library(sjmisc) library(ggplot2) data(efc) theme_set(theme_sjplot()) ``` ## Fitting a logistic regression model First, we fit a model that will be used in the following examples. The examples work in the same way for any other model as well. ```{r results='hide'} # create binary response y <- ifelse(efc$neg_c_7 < median(na.omit(efc$neg_c_7)), 0, 1) # create data frame for fitting model df <- data.frame( y = to_factor(y), sex = to_factor(efc$c161sex), dep = to_factor(efc$e42dep), barthel = efc$barthtot, education = to_factor(efc$c172code) ) # set variable label for response set_label(df$y) <- "High Negative Impact" # fit model m1 <- glm(y ~., data = df, family = binomial(link = "logit")) ``` ## Plotting estimates of generalized linear models The simplest function call is just passing the model object as argument. By default, estimates are sorted in descending order, with the highest effect at the top. ```{r} plot_model(m1) ``` The "neutral" line, i.e. the vertical intercept that indicates no effect (x-axis position 1 for most glm's and position 0 for most linear models), is drawn slightly thicker than the other grid lines. You can change the line color with the `vline.color`-argument. ```{r} plot_model(m1, vline.color = "red") ``` ## Sorting estimates By default, the estimates are sorted in the same order as they were introduced into the model. Use `sort.est = TRUE` to sort estimates in descending order, from highest to lowest value. ```{r} plot_model(m1, sort.est = TRUE) ``` Another way to sort estimates is to use the `order.terms`-argument. This is a numeric vector, indicating the order of estimates in the plot. In the summary, we see that "sex2" is the first term, followed by the three dependency-categories (position 2-4), the Barthel-Index (5) and two levels for intermediate and high level of education (6 and 7). ```{r} summary(m1) ``` Now we want the educational levels (6 and 7) first, than gender (1), followed by dependency (2-4)and finally the Barthel-Index (5). Use this order as numeric vector for the `order.terms`-argument. ```{r} plot_model(m1, order.terms = c(6, 7, 1, 2, 3, 4, 5)) ``` ## Estimates on the untransformed scale By default, `plot_model()` automatically exponentiates coefficients, if appropriate (e.g. for models with log or logit link). You can explicitley prevent transformation by setting the `transform`-argument to `NULL`, or apply any transformation by using a character vector with the function name. ```{r} plot_model(m1, transform = NULL) plot_model(m1, transform = "plogis") ``` ## Showing value labels By default, just the dots and error bars are plotted. Use `show.values = TRUE` to show the value labels with the estimates values, and use `show.p = FALSE` to suppress the asterisks that indicate the significance level of the p-values. Use `value.offset` to adjust the relative positioning of value labels to the dots and lines. ```{r} plot_model(m1, show.values = TRUE, value.offset = .3) ``` ## Labelling the plot As seen in the above examples, by default, the plotting-functions of **sjPlot** retrieve value and variable labels if the data is _labelled_, using the [sjlabelled-package](https://cran.r-project.org/package=sjlabelled). If the data is not labelled, the variable names are used. In such cases, use the arguments `title`, `axis.labels` and `axis.title` to annotate the plot title and axes. If you want variable names instead of labels, even for labelled data, use `""` as argument-value, e.g. `axis.labels = ""`, or set `auto.label` to `FALSE`. Furthermore, `plot_model()` applies case-conversion to all labels by default, using the [snakecase-package](https://cran.r-project.org/package=snakecase). This converts labels into human-readable versions. Use `case = NULL` to turn case-conversion off, or refer to the package-vignette of the **snakecase**-package for further options. ```{r} data(iris) m2 <- lm(Sepal.Length ~ Sepal.Width + Petal.Length + Species, data = iris) # variable names as labels, but made "human readable" # separating dots are removed plot_model(m2) # to use variable names even for labelled data plot_model(m1, axis.labels = "", title = "my own title") ``` ## Pick or remove specific terms from plot Use `terms` resp. `rm.terms` to select specific terms that should (not) be plotted. ```{r} # keep only coefficients sex2, dep2 and dep3 plot_model(m1, terms = c("sex2", "dep2", "dep3")) # remove coefficients sex2, dep2 and dep3 plot_model(m1, rm.terms = c("sex2", "dep2", "dep3")) ``` ## Standardized estimates For linear models, you can also plot standardized beta coefficients, using `type = "std"` or `type = "std2"`. These two options differ in the way how coefficients are standardized. `type = "std2"` plots standardized beta values, however, standardization follows Gelman's (2008) suggestion, rescaling the estimates by dividing them by two standard deviations instead of just one. ```{r} plot_model(m2, type = "std") ``` ## Bayesian models (fitted with Stan) `plot_model()` also supports stan-models fitted with the **rstanarm** or **brms** packages. However, there are a few differences compared to the previous plot examples. First, of course, there are no _confidence intervals_, but _uncertainty intervals_ - high density intervals, to be precise. Second, there's not just one interval range, but an _inner_ and _outer_ probability. By default, the inner probability is fixed to `.5` (50%), while the outer probability is specified via `ci.lvl` (which defaults to `.89` (89%) for Bayesian models). However, you can also use the arguments `prob.inner` and `prob.outer` to define the intervals boundaries. Third, the point estimate is by default the _median_, but can also be another value, like mean. This can be specified with the `bpe`-argument. ```{r results='hide'} if (require("rstanarm", quietly = TRUE)) { # make sure we apply a nice theme library(ggplot2) theme_set(theme_sjplot()) data(mtcars) m <- stan_glm(mpg ~ wt + am + cyl + gear, data = mtcars, chains = 1) # default model plot_model(m) # same model, with mean point estimate, dot-style for point estimate # and different inner/outer probabilities of the HDI plot_model( m, bpe = "mean", bpe.style = "dot", prob.inner = .4, prob.outer = .8 ) } ``` ## Tweaking plot appearance There are several options to customize the plot appearance: * The `colors`-argument either takes the name of a valid [colorbrewer palette](https://colorbrewer2.org/) (see also the related [vignette](custplot.html)), `"bw"` or `"gs"` for black/white or greyscaled colors, or a string with a color name. * `value.offset` and `value.size` adjust the positioning and size of value labels, if shown. * `dot.size` and `line.size` change the size of dots and error bars. * `vline.color` changes the neutral "intercept" line. * `width`, `alpha` and `scale` are passed down to certain ggplot-geoms, like `geom_errorbar()` or `geom_density_ridges()`. ```{r} plot_model( m1, colors = "Accent", show.values = TRUE, value.offset = .4, value.size = 4, dot.size = 3, line.size = 1.5, vline.color = "blue", width = 1.5 ) ``` # References Gelman A (2008) _Scaling regression inputs by dividing by two standard deviations._ Statistics in Medicine 27: 2865–2873. sjPlot/vignettes/tab_model_robust.Rmd0000644000176200001440000001364113746367064017552 0ustar liggesusers--- title: "Robust Estimation of Standard Errors, Confidence Intervals and p-values" output: rmarkdown::html_vignette params: EVAL: !r identical(Sys.getenv("NOT_CRAN"), "true") --- ```{r message=FALSE, warning=FALSE, include=FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE ) if (!requireNamespace("dplyr", quietly = TRUE) || !requireNamespace("sandwich", quietly = TRUE) || !requireNamespace("lme4", quietly = TRUE) || !requireNamespace("clubSandwich", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) library(sjPlot) library(dplyr) } set.seed(333) ``` The `tab_model()` function also allows the computation of standard errors, confidence intervals and p-values based on robust covariance matrix estimation from model parameters. Robust estimation is based on the packages **sandwich** and **clubSandwich**, so all models supported by either of these packages work with `tab_model()`. ## Classical Regression Models ### Robust Covariance Matrix Estimation from Model Parameters There are three arguments that allow for choosing different methods and options of robust estimation: `vcov.fun`, `vcov.type` and `vcov.args`. Let us start with a simple example, which uses a heteroskedasticity-consistent covariance matrix estimation with estimation-type "HC3" (i.e. `sandwich::vcovHC(type = "HC3")` is called): ```{r} data(iris) model <- lm(Petal.Length ~ Sepal.Length * Species + Sepal.Width, data = iris) # model parameters, where SE, CI and p-values are based on robust estimation tab_model(model, vcov.fun = "HC", show.se = TRUE) # compare standard errors to result from sandwich-package unname(sqrt(diag(sandwich::vcovHC(model)))) ``` ### Cluster-Robust Covariance Matrix Estimation (sandwich) If another covariance matrix estimation is required, use the `vcov.fun`-argument. This argument needs the suffix for the related `vcov*()`-functions as value, i.e. `vcov.fun = "CL"` would call `sandwich::vcovCL()`, or `vcov.fun = "HAC"` would call `sandwich::vcovHAC()`. The specific estimation type can be changed with `vcov.type`. E.g., `sandwich::vcovCL()` accepts estimation types HC0 to HC3. In the next example, we use a clustered covariance matrix estimation with HC1-estimation type. ```{r} # change estimation-type tab_model(model, vcov.fun = "CL", vcov.type = "HC1", show.se = TRUE) # compare standard errors to result from sandwich-package unname(sqrt(diag(sandwich::vcovCL(model)))) ``` Usually, clustered covariance matrix estimation is used when there is a cluster-structure in the data. The variable indicating the cluster-structure can be defined in `sandwich::vcovCL()` with the `cluster`-argument. In `tab_model()`, additional arguments that should be passed down to functions from the **sandwich** package can be specified in `vcov.args`: ```{r} iris$cluster <- factor(rep(LETTERS[1:8], length.out = nrow(iris))) # change estimation-type, defining additional arguments tab_model( model, vcov.fun = "CL", vcov.type = "HC1", vcov.args = list(cluster = iris$cluster), show.se = TRUE ) # compare standard errors to result from sandwich-package unname(sqrt(diag(sandwich::vcovCL(model, cluster = iris$cluster)))) ``` ### Cluster-Robust Covariance Matrix Estimation (clubSandwich) Cluster-robust estimation of the variance-covariance matrix can also be achieved using `clubSandwich::vcovCR()`. Thus, when `vcov.fun = "CR"`, the related function from the **clubSandwich** package is called. Note that this function _requires_ the specification of the `cluster`-argument. ```{r} # create fake-cluster-variable, to demonstrate cluster robust standard errors iris$cluster <- factor(rep(LETTERS[1:8], length.out = nrow(iris))) # cluster-robust estimation tab_model( model, vcov.fun = "CR", vcov.type = "CR1", vcov.args = list(cluster = iris$cluster), show.se = TRUE ) # compare standard errors to result from clubSsandwich-package unname(sqrt(diag(clubSandwich::vcovCR(model, type = "CR1", cluster = iris$cluster)))) ``` ### Robust Covariance Matrix Estimation on Standardized Model Parameters Finally, robust estimation can be combined with standardization. However, robust covariance matrix estimation only works for `show.std = "std"`. ```{r} # model parameters, robust estimation on standardized model tab_model( model, show.std = "std", vcov.fun = "HC" ) ``` ## Mixed Models ### Robust Covariance Matrix Estimation for Mixed Models For linear mixed models, that by definition have a clustered ("hierarchical" or multilevel) structure in the data, it is also possible to estimate a cluster-robust covariance matrix. This is possible due to the **clubSandwich** package, thus we need to define the same arguments as in the above example. ```{r} library(lme4) data(iris) set.seed(1234) iris$grp <- as.factor(sample(1:3, nrow(iris), replace = TRUE)) # fit example model model <- lme4::lmer( Sepal.Length ~ Species * Sepal.Width + Petal.Length + (1 | grp), data = iris ) # normal model parameters, like from 'summary()' tab_model(model) # model parameters, cluster robust estimation for mixed models tab_model( model, vcov.fun = "CR", vcov.type = "CR1", vcov.args = list(cluster = iris$grp) ) ``` ### Robust Covariance Matrix Estimation on Standardized Mixed Model Parameters Again, robust estimation can be combined with standardization for linear mixed models as well, which in such cases also only works for `show.std = "std"`. ```{r} # model parameters, cluster robust estimation on standardized mixed model tab_model( model, show.std = "std", vcov.fun = "CR", vcov.type = "CR1", vcov.args = list(cluster = iris$grp) ) ``` sjPlot/vignettes/table_css.Rmd0000644000176200001440000000753413662304072016154 0ustar liggesusers--- title: "Customizing HTML tables" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Customizing HTML tables} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE) ``` All `tab_*`-functions create a HTML page with the table output. This table, by default, is opened in the viewer pane of your IDE (in case you’re using an IDE that also supports the viewer pane). If a viewer pane is not available, the created HTML output is saved as temporary file and opened in your default web browser. The temporary files are deleted after your R session ends. ## Copying table output to office or word processors ### Export table as HTML file to open in word processors You can save the HTML page as file for further usage by specifying the `file`-argument The saved HTML file can be opened by word processors like LibreOffice or Microsoft Office. ### Drag and drop from browser or RStudio viewer pane You can directly drag and drop a table from the RStudio viewer pane or browser into your word processor. Simply select the complete table with your mouse and drag it into office. ## Customizing table output with the CSS parameter The table output is in in HTML format. The table style (visual appearance) is formatted using _Cascading Style Sheets_ (CSS). If you are a bit familiar with these topics, you can easily customize the appearance of the table output. Many table elements (header, row, column, cell, summary row, first row or column...) have CSS-class attributes, which can be used to change the table style. Since each `sjt.*` function as well as `tab_model()` has different table elements and thus different class attributes, you first need to know which styles can be customized. ### Retrieving customizable styles The table functions invisibly return several values. The return value `page.style` contains the style information for the HTML table. You can print this style sheet to console using the `cat()`-function: ```{r} library(sjPlot) data(efc) m <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) tab <- tab_model(m) ``` ```{r echo = TRUE} cat(tab$page.style) ``` The HTML code is in the `page.content` return value. The following code prints the HTML code of the table to the R console: ```{r echo = TRUE} cat(tab$page.content) ``` Now you can see which table elements are associated with which CSS class attributes. ## Customizing table output with the CSS parameter You can customize the table output with the `CSS` parameter. This parameter requires a list of attributes, which follow a certain pattern: 1) each attributes needs a `css.` prefix 2) followed by the class name (e.g. `caption`, `thead`, `centeralign`, etc.) 3) equal-sign 4) the CSS format (in (single) quotation marks) 5) the CSS format must end with a colon (;) Example: ```{r} tab_model( m, CSS = list( css.depvarhead = 'color: red;', css.centeralign = 'text-align: left;', css.firsttablecol = 'font-weight: bold;', css.summary = 'color: blue;' ) ) ``` In the above example, the header row lost the original style and just became red. If you want to keep the original style and just add additional style information, use the plus-sign (+) as initial character for the parameter attributes. In the following example, the header row keeps its original style and is additionally printed in red: ```{r} tab_model(m, CSS = list(css.depvarhead = '+color: red;')) ``` ## Pre-defined Table-Layouts There are a few pre-defined CSS-themes, which can be accessed with the `css_theme()`-function. There are more pre-defined themes planned for the future. ```{r} tab_model(m, CSS = css_theme("cells")) ``` sjPlot/vignettes/tab_bayes.Rmd0000644000176200001440000000564513612122336016143 0ustar liggesusers--- title: "Summary of Bayesian Models as HTML Table" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette params: EVAL: !r identical(Sys.getenv("NOT_CRAN"), "true") --- ```{r echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE ) if (!requireNamespace("insight", quietly = TRUE) || !requireNamespace("httr", quietly = TRUE) || !requireNamespace("brms", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) } ``` This vignette shows examples for using `tab_model()` to create HTML tables for mixed models. Basically, `tab_model()` behaves in a very similar way for mixed models as for other, simple regression models, as shown [in this vignette](tab_model_estimates.html). ```{r, results='hide', message=FALSE, warning=FALSE} # load required packages library(sjPlot) library(insight) library(httr) library(brms) # load sample models # zinb <- read.csv("http://stats.idre.ucla.edu/stat/data/fish.csv") # set.seed(123) # m1 <- brm(bf( # count ~ persons + child + camper + (1 | persons), # zi ~ child + camper + (1 | persons) # ), # data = zinb, # family = zero_inflated_poisson() # ) m1 <- insight::download_model("brms_zi_2") # data(epilepsy) # set.seed(123) # epilepsy$visit <- as.numeric(epilepsy$visit) # epilepsy$Base2 <- sample(epilepsy$Base, nrow(epilepsy), replace = TRUE) # f1 <- bf(Base ~ zAge + count + (1 |ID| patient)) # f2 <- bf(Base2 ~ zAge + Trt + (1 |ID| patient)) # m2 <- brm(f1 + f2 + set_rescor(FALSE), data = epilepsy) m2 <- insight::download_model("brms_mv_3") ``` ## Bayesian models summaries as HTML table For Bayesian regression models, some of the differences to the table output from [simple models](tab_model_estimates.html) or [mixed models](tab_mixed.html) of `tab_models()` are the use of _Highest Density Intervals_ instead of confidence intervals, the Bayes-R-squared values, and a different "point estimate" (which is, by default, the median from the posterior draws). ```{r} tab_model(m1) ``` ## Multivariate response models For multivariate response models, like mediator-analysis-models, it is recommended to print just one model in the table, as each regression is displayed as own "model" in the output. ```{r} tab_model(m2) ``` ## Show two Credible Interval-column To show a second CI-column, use `show.ci50 = TRUE`. ```{r} tab_model(m2, show.ci50 = TRUE) ``` ## Mixing multivariate and univariate response models When both multivariate and univariate response models are displayed in one table, a column _Response_ is added for the multivariate response model, to indicate the different outcomes. ```{r} tab_model(m1, m2) ``` sjPlot/vignettes/plot_likert_scales.Rmd0000644000176200001440000000414513662304072020072 0ustar liggesusers--- title: "Plotting Likert Scales" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Plotting Likert Scales} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r set-options, echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", dev = "png", fig.width = 7, fig.height = 6, message = FALSE, warning = FALSE) options(width = 800, tibble.width = Inf) if (!requireNamespace("dplyr", quietly = TRUE) || !requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("parameters", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) } ``` ```{r fig.height = 5.5} library(dplyr) library(sjPlot) library(sjmisc) library(parameters) data(efc) # find all variables from COPE-Index, which all have a "cop" in their # variable name, and then plot that subset as likert-plot mydf <- find_var(efc, pattern = "cop", out = "df") plot_likert(mydf) ``` ```{r} plot_likert( mydf, grid.range = c(1.2, 1.4), expand.grid = FALSE, values = "sum.outside", show.prc.sign = TRUE ) ``` ```{r} # Plot in groups plot_likert(mydf, groups = c(2, 1, 1, 1, 1, 2, 2, 2, 1)) ``` ```{r fig.height = 6.5} pca <- parameters::principal_components(mydf) groups <- parameters::closest_component(pca) plot_likert(mydf, groups = groups) ``` ```{r} plot_likert( mydf, c(rep("B", 4), rep("A", 5)), sort.groups = FALSE, grid.range = c(0.9, 1.1), geom.colors = "RdBu", rel_heights = c(6, 8), wrap.labels = 40, reverse.scale = TRUE ) ``` ```{r fig.height = 5} # control legend items six_cat_example = data.frame( matrix(sample(1:6, 600, replace = TRUE), ncol = 6) ) six_cat_example <- six_cat_example %>% dplyr::mutate_all( ~ ordered(., labels = c("+++", "++", "+", "-", "--", "---"))) # Old default plot_likert( six_cat_example, groups = c(1, 1, 1, 2, 2, 2), group.legend.options = list(nrow = 2, byrow = FALSE) ) # New default plot_likert(six_cat_example, groups = c(1, 1, 1, 2, 2, 2)) ``` sjPlot/vignettes/sjtitemanalysis.Rmd0000644000176200001440000001032513662304072017430 0ustar liggesusers--- title: "Item Analysis of a Scale or an Index" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Item Analysis of a Scale or an Index} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") if (!requireNamespace("dplyr", quietly = TRUE) || !requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("parameters", quietly = TRUE) || !requireNamespace("psych", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) } ``` This document shows examples for using the `tab_itemscale()` function of the sjPlot package. ## Performing an item analysis of a scale or index This function performs an item analysis with certain statistics that are useful for scale or index development. Following statistics are computed for each variable (column) of a data frame: * percentage of missing values * mean value * standard deviation * skew * item difficulty * item discrimination * Cronbach's Alpha if item was removed from scale * mean (or average) inter-item-correlation Optional, following statistics can be computed as well: * kurstosis * Shapiro-Wilk Normality Test If the argument `factor.groups` is _not_ `NULL`, the data frame df will be splitted into groups, assuming that `factor.groups` indicate those columns (variables) of the data frame that belong to a certain factor (see, for instance, return value of function `tab_pca()` or `parameters::principal_components()` as example for retrieving factor groups for a scale). This is useful when you have perfomed a principal component analysis or factor analysis as first step, and now want to see whether the found factors / components represent a scale or index score. To demonstrate this function, we first need some data: ```{r, echo=FALSE, message=FALSE, warning=FALSE} library(sjPlot) library(sjmisc) library(dplyr) data(efc) # create data frame with COPE-index scale mydf <- dplyr::select(efc, contains("cop")) ``` ## Index score with one component The simplest function call is just passing the data frame as argument. In this case, the function assumes that all variables of the data frame belong to one factor only. ```{r} tab_itemscale(mydf) ``` To interprete the output, we may consider following values as rule-of-thumbs for indicating a reliable scale: * item difficulty should range between 0.2 and 0.8. Ideal value is p+(1-p)/2 (which mostly is between 0.5 and 0.8) * for item discrimination, acceptable values are 0.2 or higher; the closer to 1 the better * in case the total Cronbach's Alpha value is below the acceptable cut-off of 0.7 (mostly if an index has few items), the mean inter-item-correlation is an alternative measure to indicate acceptability; satisfactory range lies between 0.2 and 0.4 ## Index score with more than one component The items of the COPE index used for our example do not represent a single factor. We can check this, for instance, with a principle component analysis. If you know, which variable belongs to which factor (i.e. which variable is part of which component), you can pass a numeric vector with these group indices to the argument `factor.groups`. In this case, the data frame is divided into the components specified by `factor.groups`, and each component (or factor) is analysed. ```{r} library(parameters) # Compute PCA on Cope-Index, and retrieve # factor indices for each COPE index variable pca <- parameters::principal_components(mydf) factor.groups <- parameters::closest_component(pca) ``` The PCA extracted two components. Now `tab_itemscale()` ... 1. performs an item analysis on both components, showing whether each of them is a reliable and useful scale or index score 2. builds an index of each component, by standardizing each scale 3. and adds a component-correlation-matrix, to see whether the index scores (which are based on the components) are highly correlated or not. ```{r} tab_itemscale(mydf, factor.groups) ``` ## Adding further statistics ```{r} tab_itemscale(mydf, factor.groups, show.shapiro = TRUE, show.kurtosis = TRUE) ``` sjPlot/vignettes/plot_marginal_effects.Rmd0000644000176200001440000001701014147735034020537 0ustar liggesusers--- title: "Plotting Marginal Effects of Regression Models" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette params: EVAL: !r identical(Sys.getenv("NOT_CRAN"), "true") --- ```{r set-options, echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE, dev = "png", fig.width = 7, fig.height = 3.5, warning = FALSE, eval = TRUE # eval = if (isTRUE(exists("params"))) params$EVAL else FALSE ) options(width = 800, tibble.width = Inf) if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("splines", quietly = TRUE) || !requireNamespace("ggplot2", quietly = TRUE) || !requireNamespace("sjlabelled", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ``` This document describes how to plot marginal effects of various regression models, using the `plot_model()` function. `plot_model()` is a generic plot-function, which accepts many model-objects, like `lm`, `glm`, `lme`, `lmerMod` etc. `plot_model()` allows to create various plot tyes, which can be defined via the `type`-argument. The default is `type = "fe"`, which means that fixed effects (model coefficients) are plotted. To plot marginal effects, call `plot_model()` with: * `type = "pred"` to plot predicted values (marginal effects) for specific model terms. * `type = "eff"`, which is similar to `type = "pred"`, however, discrete predictors are held constant at their proportions (not reference level). It internally calls \code{\link[effects]{Effect}} via \code{\link[ggeffects]{ggeffect}}. * `type = "emm"`, which is similar to `type = "eff"`. It internally calls \code{\link[emmeans]{emmeans}} via \code{\link[ggeffects]{ggemmeans}}. * `type = "int"` to plot marginal effects of interaction terms. To plot marginal effects of regression models, at least one model term needs to be specified for which the effects are computed. It is also possible to compute marginal effects for model terms, grouped by the levels of another model's predictor. The function also allows plotting marginal effects for two- or three-way-interactions, however, this is shown in a different vignette. `plot_model()` supports [labelled data](https://cran.r-project.org/package=sjlabelled) and automatically uses variable and value labels to annotate the plot. This works with most regression modelling functions. ***Note:** For marginal effects plots, **sjPlot** calls functions from the [**ggeffects-package**](https://strengejacke.github.io/ggeffects/). If you need more flexibility when creating marginal effects plots, consider directly using the **ggeffects**-package.* # Marginal effects `plot_model(type = "pred")` computes predicted values for all possible levels and values from a model's predictors. In the simplest case, a fitted model is passed as first argument, followed by the `type` argument and the term in question as `terms` argument: ```{r} library(sjPlot) library(ggplot2) data(efc) theme_set(theme_sjplot()) fit <- lm(barthtot ~ c12hour + neg_c_7 + c161sex + c172code, data = efc) plot_model(fit, type = "pred", terms = "c12hour") ``` The plot shows the predicted values for the response at each value from the term _c12hour_. ## Marginal effects for different groups The `terms`-argument accepts up to three model terms, where the second and third term indicate grouping levels. This allows predictions for the term in question at different levels for other model terms: ```{r} plot_model(fit, type = "pred", terms = c("c12hour", "c172code")) ``` A second grouping structure can be defined, which will create a plot with multiple panels in grid layout: ```{r} plot_model(fit, type = "pred", terms = c("c12hour", "c172code", "c161sex")) ``` ## Marginal effects at specific values or levels The `terms`-argument not only defines the model terms of interest, but each model term _that defines the grouping structure_ can be limited to certain values. This allows to compute and plot marginal effects for terms at specific values only. To define these values, put them in square brackets directly after the term name: `terms = c("c12hour [30, 50, 80]", "c172code [1,3]")` ```{r} plot_model(fit, type = "pred", terms = c("c12hour [30, 50, 80]", "c172code [1,3]")) ``` Note that in the above plot, although the values 30, 50 and 80 only are selected from _c12hour_, the continuous scale automatically adds panel grids every 5 units along the x-axis. Defining own values is especially useful when variables are, for instance, log-transformed. `plot_model()` then typically only uses the range of the log-transformed variable, which is in most cases not what we want. In such situation, specify the range in the `terms`-argument. ```{r} data(mtcars) mpg_model <- lm(mpg ~ log(hp), data = mtcars) # x-values and predictions based on the log(hp)-values plot_model(mpg_model, type = "pred", terms = "hp") # x-values and predictions based on hp-values from 50 to 150 plot_model(mpg_model, type = "pred", terms = "hp [50:150]") ``` The brackets in the `terms`-argument also accept the name of a valid function, to (back-)transform predicted valued. In this example, an alternative would be to specify that values should be exponentiated, which is indicated by `[exp]` in the `terms`-argument: ```{r} # x-values and predictions based on exponentiated hp-values plot_model(mpg_model, type = "pred", terms = "hp [exp]") ``` ## Polynomial terms and splines The function also works for models with polynomial terms or splines. Following code reproduces the plot from `?splines::bs`: ```{r} library(splines) data(women) fm1 <- lm(weight ~ bs(height, df = 5), data = women) plot_model(fm1, type = "pred", terms = "height") ``` ## Different constant values for factors Model predictions are based on all possible combinations of the model terms, which are - roughly speaking - created using `expand.grid()`. For the terms in question, all values are used for combinations. All other model predictors that are _not_ specified in the `terms`-argument, are held constant (which is achieved with `sjstats::typical_value()`). By default, continuous variables are set to their mean, while factors are set to their reference level. ```{r} data(efc) efc$c172code <- sjlabelled::as_factor(efc$c172code) fit <- lm(neg_c_7 ~ c12hour + c172code, data = efc) # reference category is used for "c172code", i.e. c172code # used the first level as value for predictions plot_model(fit, type = "pred", terms = "c12hour") ``` However, one may want to set factors to their _proportions_ instead of reference level. E.g., a factor _gender_ with value 0 for female and value 1 for male persons, would be set to `0` when marginal effects are computed with `type = "pred"`. But if 40% of the sample are female persons, another possibility to hold this factor constant is to use the value `.4` (reflecting the proportion of 40%). If this is required, use `type = "eff"`, which internally does not call `predict()` to compute marginal effects, but rather `effects::effect()`. ```{r} # proportion is used for "c172code", i.e. it is set to # mean(sjlabelled::as_numeric(efc$c172code), na.rm = T), # which is about 1.9715 plot_model(fit, type = "eff", terms = "c12hour") ``` # Interaction terms Plotting interaction terms are described in a [separate vignette](plot_interactions.html). sjPlot/R/0000755000176200001440000000000014147735034011736 5ustar liggesuserssjPlot/R/plot_type_ranef.R0000644000176200001440000002277413627513172015266 0ustar liggesusers#' @importFrom purrr map map_df map2 #' @importFrom stats qnorm #' @importFrom dplyr if_else #' @importFrom sjmisc remove_var #' @importFrom insight find_random plot_type_ranef <- function(model, dat, ri.nr, ci.lvl, se, tf, sort.est, title, axis.labels, axis.lim, grid.breaks, show.values, value.offset, digits, facets, geom.colors, geom.size, line.size, vline.color, value.size, bpe.color, ci.style, ...) { if (inherits(model, "clmm")) { se <- FALSE ci.lvl <- NA } if (!requireNamespace("lme4", quietly = TRUE)) { stop("Package 'lme4' required for this function to work, please install it.") } if (!requireNamespace("glmmTMB", quietly = TRUE)) { stop("Package 'glmmTMB' required for this function to work, please install it.") } # get tidy output of summary ---- if (inherits(model, "glmmTMB")) rand.ef <- glmmTMB::ranef(model)[[1]] else if (inherits(model, "MixMod")) { rand.ef <- lme4::ranef(model) if (!is.list(rand.ef)) { rand.ef <- list(rand.ef) names(rand.ef) <- insight::find_random(model, flatten = TRUE) } } else rand.ef <- lme4::ranef(model) if (inherits(model, "clmm")) rand.se <- NULL else if (inherits(model, "glmmTMB")) { if (requireNamespace("TMB", quietly = TRUE)) { s1 <- TMB::sdreport(model$obj, getJointPrecision = TRUE) s2 <- sqrt(s1$diag.cov.random) rand.se <- purrr::map(rand.ef, function(.x) { cnt <- nrow(.x) * ncol(.x) s3 <- s2[1:cnt] s2 <- s2[-(1:cnt)] as.data.frame(matrix(sqrt(s3), ncol = ncol(.x), byrow = TRUE)) }) } else { se <- FALSE ci.lvl <- NA rand.se <- NULL } } else rand.se <- se_ranef(model) # get some initial values ri.cnt <- length(rand.ef) ran.names <- names(rand.ef) # set some initial values loops <- 1 p <- list() if (missing(value.size) || is.null(value.size)) value.size <- 4 # do we have a specific random intercept # specified? If yes, check valid index if (!missing(ri.nr) && !is.null(ri.nr)) { out.of.bounds <- which(ri.nr > ri.cnt) # remove out of bound indices if (length(out.of.bounds) > 0) { ri.nr <- ri.nr[-out.of.bounds] # any valid indices left? if (length(ri.nr) == 0) { stop("All indices specified in `ri.nr` were greater than amount of random intercepts in model. Please use valid range for `ri.nr`.", call. = F) } else { message("One or more indices specified in `ri.nr` were greater than amount of random intercepts in model. These indices have been removed from `ri.nr`.") } } # our looping counter contains all rand. int. indices loops <- ri.nr } else { # else, if ri.nr was NULL, plot all random intercepts, i.e. # looping counter contains all index numbers loops <- ri.nr <- seq_len(ri.cnt) } # convert to list of data frames, keep only needed random effects rand.ef <- purrr::map( loops, ~ rownames_as_column(rand.ef[[.x]]) ) # same for standard errors... rand.se <- purrr::map( loops, ~ rand.se[.x] %>% as.data.frame() %>% rownames_as_column() ) # if we have only one random intercept, and facet.grid # not specified, default it to false if (missing(facets)) facets <- any(purrr::map_lgl(rand.ef, ~ length(.x) > 1)) || length(ri.nr) > 1 # set default sorting. if "sort.est" is logical, set # default string value for sorting if (!is.null(sort.est)) { if (isTRUE(sort.est)) sort.est <- "sort.all" else if (is.logical(sort.est)) sort.est <- NULL } # compute ci, two-ways if (!is.null(ci.lvl) && !is.na(ci.lvl)) ci <- 1 - ((1 - ci.lvl) / 2) else ci <- NA # iterate all random effects for (lcnt in loops) { mydf.ef <- as.data.frame(rand.ef[[lcnt]]) if (!sjmisc::is_empty(rand.se)) se.fit <- rand.se[[lcnt]] grp.names <- colnames(mydf.ef) grp.names[2] <- paste(ran.names[lcnt], grp.names[2]) # use rownames, if axis.labels not available if (is.null(axis.labels)) alabels <- mydf.ef[["rowname"]] else alabels <- axis.labels # select random effects for each coefficient mydf <- purrr::map_df(2:ncol(mydf.ef), function(i) { tmp <- data_frame(estimate = mydf.ef[[i]]) if (isTRUE(se)) { tmp$conf.low = mydf.ef[[i]] - se.fit[[i]] tmp$conf.high = mydf.ef[[i]] + se.fit[[i]] } else if (!is.na(ci.lvl)) { tmp$conf.low = mydf.ef[[i]] - (stats::qnorm(ci) * se.fit[[i]]) tmp$conf.high = mydf.ef[[i]] + (stats::qnorm(ci) * se.fit[[i]]) } else { tmp$conf.low = NA tmp$conf.high = NA } if (!is.null(tf)) { # no transformation if standard errors should be reported # instead of conf. int. if (isTRUE(se)) { message("If standard errors are requested, no transformation is applied to estimates.") tf <- NULL } else { funtrans <- match.fun(tf) tmp$estimate <- funtrans(tmp$estimate) tmp$conf.low <- funtrans(tmp$conf.low) tmp$conf.high <- funtrans(tmp$conf.high) } } # set column names (variable / coefficient name) # as group indicator, and save axis labels and title in variable tmp$facet <- grp.names[i] tmp$term <- factor(alabels) tmp$title <- dplyr::if_else(facets, "Random effects", sprintf("Random effects of %s", grp.names[i])) # sort data frame, initial order reihe <- seq_len(nrow(tmp)) # sorting requested? if (!is.null(sort.est)) { # should all plots be sorted? works only # when faceting is FALSE if (sort.est == "sort.all") { if (facets) { # no sorting with facet.grids, because y-axis-labels # (group levels / labels) have to be re-sorted for # each coefficient, which is not possible with facet.grids message("Sorting each group of random effects ('sort.all') is not possible when 'facets = TRUE'.") } else { # sort odds ratios of random effects # for current coefficient reihe <- order(mydf.ef[[i]]) } } else { # else, just sort a specific coefficient # this also works with facet.grid reihe <- order(mydf.ef[[sort.est]]) } } # sort axis labels tmp$reihe <- order(reihe) # create default grouping, depending on the effect: # split positive and negative associations with outcome # into different groups treshold <- dplyr::if_else(isTRUE(tf == "exp"), 1, 0) tmp$group <- dplyr::if_else(tmp$estimate > treshold, "pos", "neg") # no p-values for random effects, # but value labels ps <- rep("", nrow(tmp)) if (show.values) ps <- sprintf("%.*f", digits, tmp$estimate) tmp$p.label <- ps tmp }) # if user doesn't want facets, split data frame at each facet-group # and call plot-function for each sub-data frame. we need to remove # the facet variable, else the plotting function would try to plot facets if (!facets) mydf <- purrr::map(split(mydf, f = mydf$facet), ~ sjmisc::remove_var(.x, "facet")) else mydf <- list(mydf) pl <- purrr::map2( mydf, 1:length(mydf), function(x, y) { # sort terms x$term <- factor(x$term, levels = unique(x$term[order(x$reihe)])) # now we need a named vector, in order # to match labels and term order at axis labs <- as.character(x$term) names(labs) <- labs # plot title if (sjmisc::is_empty(title)) { ptitle <- x[["title"]] } else { if (length(title) >= y) ptitle <- title[y] else ptitle <- title } # plot random effects plot_point_estimates( model = model, dat = x, tf = tf, title = ptitle, axis.labels = labs, axis.title = NULL, axis.lim = axis.lim, grid.breaks = grid.breaks, show.values = show.values, value.offset = value.offset, geom.size = geom.size, line.size = line.size, geom.colors = geom.colors, vline.color = vline.color, value.size = value.size, facets = facets, bpe.color = bpe.color, ci.style = ci.style, ... ) } ) # add plot result to final return value if (length(loops) == 1 && length(mydf) == 1) p <- pl[[1]] else { for (i in seq_len(length(pl))) p[[length(p) + 1]] <- pl[[i]] } } p } sjPlot/R/sjPlotAnova.R0000644000176200001440000002751513543605402014325 0ustar liggesusers#' @title Plot One-Way-Anova tables #' @name sjp.aov1 #' #' @description Plot One-Way-Anova table sum of squares (SS) of each factor level (group) #' against the dependent variable. The SS of the factor variable against the #' dependent variable (variance within and between groups) is printed to #' the model summary. #' #' @param var.dep Dependent variable. Will be used with following formula: #' \code{aov(var.dep ~ var.grp)} #' @param var.grp Factor with the cross-classifying variable, where \code{var.dep} #' is grouped into the categories represented by \code{var.grp}. #' @param meansums Logical, if \code{TRUE}, the values reported are the true group mean values. #' If \code{FALSE} (default), the values are reported in the standard way, i.e. the values indicate the difference of #' the group mean in relation to the intercept (reference group). #' @param string.interc Character vector that indicates the reference group (intercept), that is appended to #' the value label of the grouping variable. Default is \code{"(Intercept)"}. #' #' @inheritParams plot_grpfrq #' @inheritParams plot_xtab #' @inheritParams plot_gpt #' @inheritParams plot_model #' #' @return A ggplot-object. #' #' @examples #' data(efc) #' # note: "var.grp" does not need to be a factor. #' # coercion to factor is done by the function #' sjp.aov1(efc$c12hour, efc$e42dep) #' #' #' @import ggplot2 #' @importFrom sjmisc trim word_wrap to_value #' @importFrom stats confint aov summary.lm #' @importFrom rlang .data #' @importFrom sjlabelled get_label get_labels #' @export sjp.aov1 <- function(var.dep, var.grp, meansums = FALSE, title = NULL, axis.labels = NULL, rev.order = FALSE, string.interc = "(Intercept)", axis.title = "", axis.lim = NULL, geom.colors = c("#3366a0", "#aa3333"), geom.size = 3, wrap.title = 50, wrap.labels = 25, grid.breaks = NULL, show.values = TRUE, digits = 2, y.offset = .15, show.p = TRUE, show.summary = FALSE) { # -------------------------------------------------------- # get variable name # -------------------------------------------------------- var.grp.name <- get_var_name(deparse(substitute(var.grp))) var.dep.name <- get_var_name(deparse(substitute(var.dep))) # -------------------------------------------------------- # try to automatically set labels is not passed as parameter # -------------------------------------------------------- if (is.null(axis.labels)) axis.labels <- sjlabelled::get_labels(var.grp, attr.only = F, values = NULL, non.labelled = T) if (is.null(axis.title)) axis.title <- sjlabelled::get_label(var.dep, def.value = var.dep.name) if (is.null(title)) { t1 <- sjlabelled::get_label(var.grp, def.value = var.grp.name) t2 <- sjlabelled::get_label(var.dep, def.value = var.dep.name) if (!is.null(t1) && !is.null(t2)) title <- paste0(t1, " by ", t2) } # -------------------------------------------------------- # remove titles if empty # -------------------------------------------------------- if (!is.null(axis.labels) && length(axis.labels) == 1 && axis.labels == "") axis.labels <- NULL if (!is.null(axis.title) && length(axis.title) == 1 && axis.title == "") axis.title <- NULL if (!is.null(title) && length(title) == 1 && title == "") title <- NULL # -------------------------------------------------------- # unlist labels # -------------------------------------------------------- if (!is.null(axis.labels)) { # append "intercept" string, to mark the reference category axis.labels[1] <- paste(axis.labels[1], string.interc) } # -------------------------------------------------------- # Check if var.grp is factor. If not, convert to factor # -------------------------------------------------------- if (!is.factor(var.grp)) var.grp <- as.factor(var.grp) # -------------------------------------------------------- # check whether we have x-axis title. if not, use standard # value # -------------------------------------------------------- # check length of diagram title and split longer string at into new lines # every 50 chars if (!is.null(title)) title <- sjmisc::word_wrap(title, wrap.title) # check length of x-axis title and split longer string at into new lines # every 50 chars if (!is.null(axis.title)) axis.title <- sjmisc::word_wrap(axis.title, wrap.title) # check length of x-axis-labels and split longer strings at into new lines # every 10 chars, so labels don't overlap if (!is.null(axis.labels)) axis.labels <- sjmisc::word_wrap(axis.labels, wrap.labels) # ---------------------------- # Calculate one-way-anova. Since we have # only one group variable, Type of SS does # not matter. # ---------------------------- fit <- stats::aov(var.dep ~ var.grp) # coefficients (group mean) means <- stats::summary.lm(fit)$coefficients[, 1] # p-values of means means.p <- stats::summary.lm(fit)$coefficients[, 4] # lower confidence intervals of coefficients (group mean) means.lci <- stats::confint(fit)[, 1] # upper confidence intervals of coefficients (group mean) means.uci <- stats::confint(fit)[, 2] # ---------------------------- # Check whether true group means should be reported # or the differences of group means in relation to the # intercept (reference group). The latter is the default. # ---------------------------- if (meansums) { for (i in 2:length(means)) { means[i] <- means[i] + means[1] means.lci[i] <- means.lci[i] + means[1] means.uci[i] <- means.uci[i] + means[1] } } # ---------------------------- # create expression with model summarys. used # for plotting in the diagram later # ---------------------------- if (show.summary) { # sum of squares ss <- summary(fit)[[1]]['Sum Sq'] # multiple r2 r2 <- stats::summary.lm(fit)$r.squared # adj. r2 r2.adj <- stats::summary.lm(fit)$adj.r.squared # get F-statistics fstat <- stats::summary.lm(fit)$fstatistic[1] # p-value for F-test pval <- summary(fit)[[1]]['Pr(>F)'][1, 1] # indicate significance level by stars pan <- get_p_stars(pval) # create mathematical term modsum <- as.character(as.expression( substitute(italic(SS[B]) == ssb * "," ~~ italic(SS[W]) == ssw * "," ~~ R^2 == mr2 * "," ~~ "adj." * R^2 == ar2 * "," ~~ "F" == f * panval, list(ssb = sprintf("%.2f", ss[1, ]), ssw = sprintf("%.2f", ss[2, ]), mr2 = sprintf("%.3f", r2), ar2 = sprintf("%.3f", r2.adj), f = sprintf("%.2f", fstat), panval = pan)))) } # ---------------------------- # print coefficients and p-values in plot # ---------------------------- # init data column for p-values ps <- round(means, digits) # if no values should be shown, clear # vector now if (!show.values) ps <- rep("", length(ps)) # -------------------------------------------------------- # copy p-values into data column # -------------------------------------------------------- if (show.p) { for (i in seq_len(length(means.p))) { ps[i] <- sjmisc::trim(paste(ps[i], get_p_stars(means.p[i]))) } } # -------------------------------------------------------- # check whether order of category items should be reversed # or not # -------------------------------------------------------- if (rev.order) catorder <- length(means):1 else catorder <- seq_len(length(means)) # -------------------------------------------------------- # create new data.frame, since ggplot requires data.frame as parameter # The data frame contains means, CI and p-values # -------------------------------------------------------- df <- data_frame( means = means, # Append coefficients lower = means.lci, # append CI upper = means.uci, p = means.p, # append p-value pv = ps, xv = catorder ) # -------------------------------------------------------- # check if user defined labels have been supplied # if not, use variable names from data frame # -------------------------------------------------------- if (is.null(axis.labels)) axis.labels <- row.names(df) # order labels axis.labels <- axis.labels[catorder] df$means <- sjmisc::to_value(df$means, keep.labels = F) df$lower <- sjmisc::to_value(df$lower, keep.labels = F) df$upper <- sjmisc::to_value(df$upper, keep.labels = F) df$p <- sjmisc::to_value(df$p, keep.labels = F) df$pv <- as.character(df$pv) df$xv <- as.factor(df$xv) # bind color values to data frame, because we cannot use several # different color aesthetics in ggplot df <- cbind(df, geocol = ifelse(df$means >= 0, geom.colors[1], geom.colors[2])) # -------------------------------------------------------- # Calculate axis limits. The range is from lowest lower-CI # to highest upper-CI, or a user-defined range (if "axis.lim" # is not NULL) # -------------------------------------------------------- if (is.null(axis.lim)) { # we have confindence intervals displayed, so # the range corresponds to the boundaries given by # the CI's maxval <- max(df$upper) minval <- min(df$lower) if (maxval > 0) limfac <- ifelse(abs(maxval) < 5, 5, 10) else limfac <- ifelse(abs(minval) < 5, 5, 10) upper_lim <- ifelse(maxval == 0, 0, limfac * ceiling((maxval + 1) / limfac)) lower_lim <- ifelse(minval == 0, 0, limfac * floor(minval / limfac)) } else { lower_lim <- axis.lim[1] upper_lim <- axis.lim[2] } # determine gridbreaks if (is.null(grid.breaks)) ticks <- pretty(c(lower_lim, upper_lim)) else ticks <- seq(lower_lim, upper_lim, by = grid.breaks) # -------------------------------------------------------- # Set up plot padding (margins inside diagram) # -------------------------------------------------------- scaley <- scale_y_continuous( limits = c(lower_lim, upper_lim), breaks = ticks, labels = ticks ) # -------------------------------------------------------- # Start plot here! # -------------------------------------------------------- anovaplot <- ggplot(df, aes(y = .data$means, x = .data$xv)) + # print point geom_point(size = geom.size, colour = df$geocol) + # and error bar geom_errorbar(aes(ymin = .data$lower, ymax = .data$upper), colour = df$geocol, width = 0) + # Print p-values. With vertical adjustment, so # they don't overlap with the errorbars geom_text(aes(label = .data$pv, y = .data$means), nudge_x = y.offset, show.legend = FALSE) + # set y-scale-limits, breaks and tick labels scaley + # set value labels to x-axis scale_x_discrete(labels = axis.labels, limits = 1:length(axis.labels)) + # flip coordinates labs(title = title, x = NULL, y = axis.title) + coord_flip() # check whether modelsummary should be printed if (show.summary) { # add annotations with model summary # annotations include intercept-value and model's r-square anovaplot <- anovaplot + annotate("text", label = modsum, parse = TRUE, x = -Inf, y = Inf, hjust = "right", vjust = "bottom") } anovaplot } sjPlot/R/plot_model_estimates.R0000644000176200001440000002073313627513172016301 0ustar liggesusers#' @importFrom dplyr slice filter if_else #' @importFrom rlang .data #' @importFrom sjmisc remove_var #' @importFrom purrr pmap plot_model_estimates <- function(model, dat, tf, se, terms, group.terms, rm.terms, sort.est, title, axis.title, axis.labels, axis.lim, grid.breaks, show.intercept, show.values, show.p, value.offset, digits, geom.colors, geom.size, line.size, bpe.style, bpe.color, term.order, vline.color, value.size, facets, p.threshold, ci.style, ...) { # remove intercept(s) from output if (!show.intercept) { ints1 <- string_contains("(Intercept", x = dat$term) ints2 <- string_contains("b_Intercept", x = dat$term) ints3 <- string_contains("b_zi_Intercept", x = dat$term) ints4 <- which(dat$term %in% "Intercept") ints <- c(ints1, ints2, ints3, ints4) if (!sjmisc::is_empty(ints)) dat <- dplyr::slice(dat, !! -ints) } # remove non-coefficients noncoef <- string_contains("Log(theta)", x = dat$term) if (!sjmisc::is_empty(noncoef)) dat <- dplyr::slice(dat, !! -noncoef) # exponentiation if (!is.null(tf) && !is.stan(model)) { # no transformation if standard errors should be reported # instead of conf. int. if (isTRUE(se)) { message("If standard errors are requested, no transformation is applied to estimates.") tf <- NULL } else { funtrans <- match.fun(tf) dat[["estimate"]] <- funtrans(dat[["estimate"]]) dat[["conf.low"]] <- funtrans(dat[["conf.low"]]) dat[["conf.high"]] <- funtrans(dat[["conf.high"]]) } } # use standard error instead of ci's? if (isTRUE(se)) { dat[["conf.low"]] <- dat[["estimate"]] - dat[["std.error"]] dat[["conf.high"]] <- dat[["estimate"]] + dat[["std.error"]] } # remove further estimates terms <- parse_terms(terms) filter.remove <- dat$term %in% terms if (!is.null(terms)) dat <- dplyr::filter(dat, !! filter.remove) # or select further estimates rm.terms <- parse_terms(rm.terms) filter.remove <- !(dat$term %in% rm.terms) if (!is.null(rm.terms)) dat <- dplyr::filter(dat, !! filter.remove) # add p-asterisks to data dat$p.stars <- get_p_stars(dat$p.value, p.threshold) dat$p.label <- sprintf("%.*f", digits, dat$estimate) if (show.p) dat$p.label <- sprintf("%s %s", dat$p.label, dat$p.stars) # create default grouping, depending on the effect: # split positive and negative associations with outcome # into different groups treshold <- dplyr::if_else(isTRUE(tf == "exp"), 1, 0) dat$group <- dplyr::if_else(dat$estimate > treshold, "pos", "neg") # group estimates? if (!is.null(group.terms)) { if (length(group.terms) == nrow(dat)) { dat$group <- as.character(group.terms) } else { warning("Length of `group.terms` does not equal number of model coefficients. Ignoring this argument.", call. = F) group.terms <- NULL } } # make term name categorical, for axis labelling dat$term <- as.factor(dat$term) # does user want a specific order for terms? ordered.terms <- FALSE if (!is.null(term.order)) { if (length(term.order) == nrow(dat)) { dat$term <- factor(dat$term, levels = unique(dat$term)[rev(term.order)]) sort.est <- FALSE ordered.terms <- TRUE } else { message("Number of values in `order.terms` does not match number of terms. Terms are not sorted.") } } # sort estimates by effect size if (isTRUE(sort.est)) { if (!is.null(group.terms)) dat$term <- factor(dat$term, levels = unique(dat$term[order(dat$group)])) else dat$term <- factor(dat$term, levels = unique(dat$term[order(dat$estimate)])) } else if (!ordered.terms) { dat$term <- factor(dat$term, levels = rev(unique(dat$term))) } # set default colors. for grouped predictors we need more color values if (is.null(geom.colors)) geom.colors <- dplyr::if_else(is.null(group.terms), "grey30", "Set1") # for brms multilevel with multiple random intercepts, we need # special handling if (is.stan(model) && stan.has.multiranef(dat)) { # split data, create data frame for each random intercept dat <- purrr::map(split(dat, f = dat$facet), ~ sjmisc::remove_var(.x, "facet")) # random intercept names are default titles ri.titles <- names(dat) # create plots purrr::map2( dat, 1:length(dat), function(x, y) { # now we need a named vector, in order # to match labels and term order at axis labs <- as.character(x$term) names(labs) <- labs # sort terms if (!is.null(sort.est)) { reihe <- order(x$estimate) } else { reihe <- 1:nrow(x) } x$reihe <- order(reihe) x$term <- factor(x$term, levels = unique(x$term[order(x$reihe)])) # plot title if (sjmisc::is_empty(title)) { ptitle <- ri.titles[y] } else { if (length(title) >= y) ptitle <- title[y] else ptitle <- title } # plot random effects plot_point_estimates( model = model, dat = x, tf = tf, title = ptitle, axis.labels = labs, axis.title = NULL, axis.lim = axis.lim, grid.breaks = grid.breaks, show.values = show.values, value.offset = value.offset, geom.size = geom.size, line.size = line.size, geom.colors = geom.colors, vline.color = vline.color, value.size = value.size, facets = facets, bpe.style = bpe.style, bpe.color = bpe.color, ci.style = ci.style, ... ) } ) } else { if (obj_has_name(dat, "wrap.facet") && dplyr::n_distinct(dat$wrap.facet, na.rm = TRUE) > 1 && !facets) { dat <- purrr::map(split(dat, f = dat$wrap.facet), ~ sjmisc::remove_var(.x, "wrap.facet")) if (length(axis.title) == 1) axis.title <- c(axis.title, "Odds Ratios") purrr::pmap(list(dat, axis.title, names(dat)), function(.x, .y, .z) { plot_point_estimates( model = model, dat = .x, tf = tf, title = paste0(title, " (", .z, ")"), axis.labels = axis.labels, axis.title = .y, axis.lim = NULL, grid.breaks = NULL, show.values = show.values, value.offset = value.offset, geom.size = geom.size, line.size = line.size, geom.colors = geom.colors, bpe.style = bpe.style, bpe.color = bpe.color, vline.color = vline.color, value.size = value.size, facets = facets, ci.style = ci.style, ... ) }) } else { plot_point_estimates( model = model, dat = dat, tf = tf, title = title, axis.labels = axis.labels, axis.title = axis.title, axis.lim = axis.lim, grid.breaks = grid.breaks, show.values = show.values, value.offset = value.offset, geom.size = geom.size, line.size = line.size, geom.colors = geom.colors, bpe.style = bpe.style, bpe.color = bpe.color, vline.color = vline.color, value.size = value.size, facets = facets, ci.style = ci.style, ... ) } } } sjPlot/R/html_print.R0000644000176200001440000006712214136206671014247 0ustar liggesusers#' @title Print data frames as HTML table. #' @name tab_df #' #' @description These functions print data frames as HTML-table, showing #' the results in RStudio's viewer pane or in a web browser. #' #' @param x For \code{tab_df()}, a data frame; and for \code{tab_dfs()}, a #' list of data frames. #' @param title,titles,footnote,footnotes Character vector with table #' caption(s) resp. footnote(s). For \code{tab_df()}, must be a character #' of length 1; for \code{tab_dfs()}, a character vector of same length as #' \code{x} (i.e. one title or footnote per data frame). #' @param col.header Character vector with elements used as column header for #' the table. If \code{NULL}, column names from \code{x} are used as #' column header. #' @param encoding Character vector, indicating the charset encoding used #' for variable and value labels. Default is \code{"UTF-8"}. For Windows #' Systems, \code{encoding = "Windows-1252"} might be necessary for proper #' display of special characters. #' @param show.type Logical, if \code{TRUE}, adds information about the #' variable type to the variable column. #' @param show.rownames Logical, if \code{TRUE}, adds a column with the #' data frame's rowname to the table output. #' @param show.footnote Logical, if \code{TRUE},adds a summary footnote below #' the table. For \code{tab_df()}, specify the string in \code{footnote}, #' for \code{tab_dfs()} provide a character vector in \code{footnotes}. #' @param sort.column Numeric vector, indicating the index of the column #' that should sorted. by default, the column is sorted in ascending order. #' Use negative index for descending order, for instance, #' \code{sort.column = -3} would sort the third column in descending order. #' Note that the first column with rownames is not counted. #' @param alternate.rows Logical, if \code{TRUE}, rows are printed in #' alternatig colors (white and light grey by default). #' @param digits Numeric, amount of digits after decimal point when rounding #' values. #' @param ... Currently not used. #' #' @inheritParams tab_model #' #' @return A list with following items: #' \itemize{ #' \item the web page style sheet (\code{page.style}), #' \item the HTML content of the data frame (\code{page.content}), #' \item the complete HTML page, including header, style sheet and body (\code{page.complete}) #' \item the HTML table with inline-css for use with knitr (\code{knitr}) #' \item the file path, if the HTML page should be saved to disk (\code{file}) #' } #' #' @note The HTML tables can either be saved as file and manually opened #' (use argument \code{file}) or they can be saved as temporary files and #' will be displayed in the RStudio Viewer pane (if working with RStudio) #' or opened with the default web browser. Displaying resp. opening a #' temporary file is the default behaviour. #' #' @details \strong{How do I use \code{CSS}-argument?} #' \cr \cr #' With the \code{CSS}-argument, the visual appearance of the tables #' can be modified. To get an overview of all style-sheet-classnames #' that are used in this function, see return value \code{page.style} for #' details. Arguments for this list have following syntax: #' \enumerate{ #' \item the class-name as argument name and #' \item each style-definition must end with a semicolon #' } #' You can add style information to the default styles by using a + #' (plus-sign) as initial character for the argument attributes. #' Examples: #' \itemize{ #' \item \code{table = 'border:2px solid red;'} for a solid 2-pixel table border in red. #' \item \code{summary = 'font-weight:bold;'} for a bold fontweight in the summary row. #' \item \code{lasttablerow = 'border-bottom: 1px dotted blue;'} for a blue dotted border of the last table row. #' \item \code{colnames = '+color:green'} to add green color formatting to column names. #' \item \code{arc = 'color:blue;'} for a blue text color each 2nd row. #' \item \code{caption = '+color:red;'} to add red font-color to the default table caption style. #' } #' See further examples in \href{https://strengejacke.github.io/sjPlot/articles/table_css.html}{this package-vignette}. #' #' @examples #' \dontrun{ #' data(iris) #' data(mtcars) #' tab_df(iris[1:5, ]) #' tab_dfs(list(iris[1:5, ], mtcars[1:5, 1:5])) #' #' # sort 2nd column ascending #' tab_df(iris[1:5, ], sort.column = 2) #' #' # sort 2nd column descending #' tab_df(iris[1:5, ], sort.column = -2)} #' #' @importFrom sjmisc var_type is_even is_float #' @importFrom purrr flatten_chr map #' @export tab_df <- function(x, title = NULL, footnote = NULL, col.header = NULL, show.type = FALSE, show.rownames = FALSE, show.footnote = FALSE, alternate.rows = FALSE, sort.column = NULL, digits = 2, encoding = "UTF-8", CSS = NULL, file = NULL, use.viewer = TRUE, ...) { # make sure list elements in CSS argument have proper name attribute CSS <- check_css_param(CSS) # get style definition style <- tab_df_style(CSS = CSS, ...) rnames <- row.names(x) x <- as.data.frame(lapply(x, function(.i) { if (is.numeric(.i) && sjmisc::is_float(.i)) sprintf("%.*f", digits, .i) else .i })) # get HTML content page.content <- tab_df_content( mydf = x, title = title, footnote = footnote, col.header = col.header, show.type = show.type, show.rownames = show.rownames, show.footnote = show.footnote, altr.row.col = alternate.rows, sort.column = sort.column, include.table.tag = TRUE, rnames = rnames, ... ) # create HTML page with header information page.complete <- tab_create_page( style = style, content = page.content, encoding = encoding ) # replace CSS-style to inline, needed for knitr documents knitr <- tab_df_knitr(CSS = CSS, content = page.content, ...) # remove spaces knitr <- rmspc(knitr) page.content <- rmspc(page.content) structure( class = c("sjTable"), list( page.style = style, page.content = page.content, page.complete = page.complete, knitr = knitr, file = file, viewer = use.viewer ) ) } #' @importFrom sjmisc var_type is_even #' @importFrom purrr flatten_chr pmap #' @rdname tab_df #' @export tab_dfs <- function(x, titles = NULL, footnotes = NULL, col.header = NULL, show.type = FALSE, show.rownames = FALSE, show.footnote = FALSE, alternate.rows = FALSE, sort.column = NULL, digits = 2, encoding = "UTF-8", CSS = NULL, file = NULL, use.viewer = TRUE, ...) { # make sure list elements in CSS argument have proper name attribute CSS <- check_css_param(CSS) # get style definition style <- tab_df_style(CSS = CSS, ...) # check arguments if (is.null(titles)) titles <- rep("", length(x)) if (is.null(footnotes)) footnotes <- rep("", length(x)) if (length(titles) != length(x)) stop("Number of elements in `title` does not match number of data frames to print.", call. = F) if (length(footnotes) != length(x)) stop("Number of elements in `footnote` does not match number of data frames to print.", call. = F) # get HTML content page.content <- paste( purrr::flatten_chr(purrr::pmap(list(x, titles, footnotes), function(dat, title, footnote) { dat[] <- lapply(dat, function(.i) { if (is.numeric(.i) && sjmisc::is_float(.i)) sprintf("%.*f", digits, .i) else .i }) tab_df_content( mydf = dat, title = title, footnote = footnote, col.header = col.header, show.type = show.type, show.rownames = show.rownames, show.footnote = show.footnote, altr.row.col = alternate.rows, sort.column = sort.column, include.table.tag = TRUE, ... ) })), collapse = "

 

" ) # create HTML page with header information page.complete <- tab_create_page( style = style, content = page.content, encoding = encoding ) # replace CSS-style to inline, needed for knitr documents knitr <- tab_df_knitr(CSS = CSS, content = page.content, ...) # remove spaces knitr <- rmspc(knitr) page.content <- rmspc(page.content) structure( class = c("sjTable"), list( page.style = style, page.content = page.content, page.complete = page.complete, knitr = knitr, file = file, viewer = use.viewer ) ) } # this function is used from tab_model() #' @importFrom dplyr slice full_join #' @importFrom sjmisc replace_na #' @importFrom purrr map map_dbl tab_model_df <- function(x, zeroinf, is.zeroinf, dv.labels, rsq.list, n_obs.list, icc.list, dev.list, aic.list, aicc.list, variance.list, ngrps.list, loglik.list, n.models, title = NULL, footnote = NULL, col.header = NULL, show.re.var = FALSE, show.icc = FALSE, digits.rsq = 3, digits.re = 2, encoding = "UTF-8", CSS = NULL, file = NULL, use.viewer = TRUE, ...) { # make sure list elements in CSS argument have proper name attribute CSS <- check_css_param(CSS) # get style definition style <- tab_df_style(CSS = CSS, ...) # check for monotonic effects sp <- string_starts_with("simo_mo", x$term) if (!sjmisc::is_empty(sp)) { x.sp <- dplyr::slice(x, !! sp) x <- dplyr::slice(x, -!! sp) x$term <- gsub(pattern = "^bsp_mo", replacement = "", x = x$term) } else x.sp <- NULL # get HTML content page.content <- tab_df_content( mydf = x, title = NULL, footnote = NULL, col.header = col.header, show.type = FALSE, show.rownames = FALSE, show.footnote = FALSE, show.header = TRUE, altr.row.col = FALSE, sort.column = NULL, include.table.tag = FALSE, no.last.table.row = TRUE, zeroinf = is.zeroinf, ... ) # do we have labels for dependent variables / models? empty_dv <- is.null(dv.labels) | (length(dv.labels) == 1 && dv.labels == "") # replace CSS for first table row if (!empty_dv) { page.content <- gsub( pattern = "thead ", replacement = "depvarhead ", x = page.content, fixed = TRUE, useBytes = TRUE ) } else { page.content <- gsub( pattern = "thead ", replacement = "depvarheadnodv ", x = page.content, fixed = TRUE, useBytes = TRUE ) } # replace HTML-Tag for first table row page.content <- gsub( pattern = "\n" dv.content <- paste0(dv.content, "  \n") for (i in 1:length(dv.labels)) { colspan <- length(string_ends_with(sprintf("_%i", i), x = colnames(x))) dv.content <- paste0( dv.content, sprintf(" %s\n", colspan, dv.labels[i]) ) } dv.content <- paste0(dv.content, " \n") page.content <- paste0(dv.content, page.content) } # simplex parameters here ---- if (!is.null(x.sp)) { x.sp$term <- gsub( pattern = "^simo_mo(.*)(\\.)(.*)(\\.)", replacement = "\\1 \\[\\3\\]", x = x.sp$term ) sp.content <- tab_df_content( mydf = x.sp, title = NULL, footnote = NULL, col.header = NULL, show.type = FALSE, show.rownames = FALSE, show.footnote = FALSE, show.header = FALSE, altr.row.col = FALSE, sort.column = NULL, include.table.tag = FALSE, no.last.table.row = TRUE, ... ) page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, sprintf(" Monotonic Effects\n", ncol(x))) page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, sp.content) } # zero inflation part here ---- if (!is.null(zeroinf)) { rem <- 1:nrow(x) zero.part <- suppressMessages( x %>% dplyr::full_join(zeroinf) %>% dplyr::slice(!! -rem) %>% sjmisc::replace_na(value = "") ) page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, sprintf(" Zero-Inflated Model\n", ncol(x))) page.content <- paste0(page.content, " \n") zero.content <- tab_df_content( mydf = zero.part, title = NULL, footnote = NULL, col.header = NULL, show.type = FALSE, show.rownames = FALSE, show.footnote = FALSE, show.header = FALSE, altr.row.col = FALSE, sort.column = NULL, include.table.tag = FALSE, no.last.table.row = TRUE, zeroinf = FALSE, ... ) page.content <- paste0(page.content, zero.content) } # prepare column span for summary information, including CSS summary.css <- "tdata summary summarydata" firstsumrow <- TRUE # add random effects ---- if (!is_empty_list(variance.list) && show.re.var) { page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, sprintf(" Random Effects\n", ncol(x))) page.content <- paste0(page.content, " \n") ## random effects: within-group-variance: sigma ---- s_css <- "tdata leftalign summary" page.content <- paste0(page.content, sprintf("\n \n σ2\n", s_css)) s_css <- summary.css for (i in 1:length(variance.list)) { if (length(variance.list) == 1) colspan <- ncol(x) - 1 else colspan <- length(string_ends_with(sprintf("_%i", i), x = colnames(x))) if (is.null(variance.list[[i]])) { page.content <- paste0( page.content, sprintf("  \n", s_css, as.integer(colspan)) ) } else { page.content <- paste0( page.content, sprintf( " %.*f\n", s_css, as.integer(colspan), digits.re, variance.list[[i]]$var.residual ) ) } } page.content <- paste0(page.content, " \n") # random effects: Between-group-variance: tau.00 ---- tau00 <- purrr::map(variance.list, ~ .x$var.intercept) tau00.len <- max(purrr::map_dbl(tau00, length)) page.content <- paste0( page.content, create_random_effects( rv.len = tau00.len, rv = tau00, rv.string = "τ00", clean.rv = "tau.00", var.names = colnames(x), summary.css = summary.css, n.cols = ncol(x), digits.re = digits.re )) # random effects: random-slope-variance: tau11 ---- has_rnd_slope <- purrr::map_lgl(variance.list, ~ !is.null(.x$var.slope)) if (any(has_rnd_slope)) { tau11 <- purrr::map(variance.list, ~ .x$var.slope) tau11.len <- max(purrr::map_dbl(tau11, length)) page.content <- paste0( page.content, create_random_effects( rv.len = tau11.len, rv = tau11, rv.string = "τ11", clean.rv = "tau.11", var.names = colnames(x), summary.css = summary.css, n.cols = ncol(x), digits.re = digits.re )) rho01 <- purrr::map(variance.list, ~ .x$cor.slope_intercept) rho01.len <- max(purrr::map_dbl(rho01, length)) page.content <- paste0( page.content, create_random_effects( rv.len = rho01.len, rv = rho01, rv.string = "ρ01", clean.rv = "rho.01", var.names = colnames(x), summary.css = summary.css, n.cols = ncol(x), digits.re = digits.re )) } } # add ICC ---- if (!is_empty_list(icc.list) && show.icc) { page.content <- paste0( page.content, create_random_effects( rv.len = 1, rv = icc.list, rv.string = "ICC", clean.rv = "icc", var.names = colnames(x), summary.css = summary.css, n.cols = ncol(x), delim = ".adjusted", digits.re = digits.re )) } if (!is_empty_list(ngrps.list)) { ngrps.len <- max(purrr::map_dbl(ngrps.list, length)) page.content <- paste0( page.content, create_random_effects( rv.len = ngrps.len, rv = ngrps.list, rv.string = "N", clean.rv = "", var.names = colnames(x), summary.css = summary.css, n.cols = ncol(x), delim = "ngrps.", as_int = TRUE )) } # add no of observations ---- if (!is_empty_list(n_obs.list)) { # find first name occurence s_css <- "tdata leftalign summary" if (firstsumrow) s_css <- paste0(s_css, " firstsumrow") page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, sprintf(" Observations\n", s_css)) # print all r-squared to table s_css <- summary.css if (firstsumrow) s_css <- paste0(s_css, " firstsumrow") for (i in 1:length(n_obs.list)) { if (length(n_obs.list) == 1) colspan <- ncol(x) - 1 else colspan <- length(string_ends_with(sprintf("_%i", i), x = colnames(x))) if (is.null(n_obs.list[[i]])) { page.content <- paste0( page.content, sprintf(" NA\n", s_css, colspan) ) } else { page.content <- paste0( page.content, sprintf( " %i\n", s_css, as.integer(colspan), as.integer(n_obs.list[[i]]) ) ) } } firstsumrow <- FALSE page.content <- paste0(page.content, " \n") } # add r-squared ---- if (!is_empty_list(rsq.list)) { # find first name occurence for (i in 1:length(rsq.list)) { if (!is.null(rsq.list[[i]])) { rname <- names(rsq.list[[i]][1]) if (length(rsq.list[[i]]) > 1) rname <- sprintf("%s / %s", rname, names(rsq.list[[i]][2])) break } } # superscript 2 s_css <- "tdata leftalign summary" if (firstsumrow) s_css <- paste0(s_css, " firstsumrow") rname <- gsub("R2", "R2", rname, fixed = TRUE) rname <- gsub("R-squared", "R2", rname, fixed = TRUE) page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, sprintf(" %s\n", s_css, rname)) # print all r-squared to table s_css <- summary.css if (firstsumrow) s_css <- paste0(s_css, " firstsumrow") for (i in 1:length(rsq.list)) { if (length(rsq.list) == 1) colspan <- ncol(x) - 1 else colspan <- length(string_ends_with(sprintf("_%i", i), x = colnames(x))) if (is.null(rsq.list[[i]]) || all(is.na(rsq.list[[i]])) || all(sjmisc::is_empty(rsq.list[[i]], first.only = FALSE))) { page.content <- paste0( page.content, sprintf(" NA\n", s_css, as.integer(colspan)) ) } else if (length(rsq.list[[i]]) > 1) { page.content <- paste0( page.content, sprintf( " %.*f / %.*f\n", s_css, as.integer(colspan), digits.rsq, rsq.list[[i]][[1]], digits.rsq, rsq.list[[i]][[2]] ) ) } else { page.content <- paste0( page.content, sprintf( " %.*f\n", s_css, as.integer(colspan), digits.rsq, rsq.list[[i]][[1]] ) ) } } firstsumrow <- FALSE page.content <- paste0(page.content, " \n") } # add deviance ---- if (!is_empty_list(dev.list)) { page.content <- paste0(page.content, create_stats( data.list = dev.list, data.string = "Deviance", firstsumrow = firstsumrow, summary.css = summary.css, var.names = colnames(x), n.cols = ncol(x) )) firstsumrow <- FALSE } # add aic ---- if (!is_empty_list(aic.list)) { page.content <- paste0(page.content, create_stats( data.list = aic.list, data.string = "AIC", firstsumrow = firstsumrow, summary.css = summary.css, var.names = colnames(x), n.cols = ncol(x) )) firstsumrow <- FALSE } # add aicc ---- if (!is_empty_list(aicc.list)) { page.content <- paste0(page.content, create_stats( data.list = aicc.list, data.string = "AICc", firstsumrow = firstsumrow, summary.css = summary.css, var.names = colnames(x), n.cols = ncol(x) )) firstsumrow <- FALSE } # add logLik ---- if (!is_empty_list(loglik.list)) { page.content <- paste0(page.content, create_stats( data.list = loglik.list, data.string = "log-Likelihood", firstsumrow = firstsumrow, summary.css = summary.css, var.names = colnames(x), n.cols = ncol(x) )) firstsumrow <- FALSE } ## TODO add bottom table border # add optional "footnote" row ---- if (!is.null(footnote)) { page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, sprintf(" %s\n", ncol(x), footnote)) page.content <- paste0(page.content, "\n") } # add table-caption ---- if (!is.null(title)) table.caption <- sprintf("%s\n", title) else table.caption <- "" # surround output with table-tag ---- page.content <- paste0("\n", table.caption, page.content, "\n
\n") # create HTML page with header information page.complete <- tab_create_page( style = style, content = page.content, encoding = encoding ) # replace CSS-style to inline, needed for knitr documents knitr <- tab_df_knitr(CSS = CSS, content = page.content, ...) # remove spaces knitr <- rmspc(knitr) page.content <- rmspc(page.content) structure( class = c("sjTable"), list( page.style = style, page.content = page.content, page.complete = page.complete, knitr = knitr, file = file, viewer = use.viewer ) ) } create_random_effects <- function(rv.len, rv, rv.string, clean.rv, var.names, summary.css, n.cols, delim = "_", as_int = FALSE, digits.re = 2) { page.content <- "" pattern <- paste0("^", clean.rv, delim) for (i in 1:rv.len) { s_css <- "tdata leftalign summary" rvs <- rv.string rv.name <- gsub(pattern, "", names(rv[[1]][i])) if (length(rv) == 1 && !sjmisc::is_empty(rv.name)) rvs <- sprintf("%s %s", rv.string, rv.name) else if (i > 1) rvs <- "" page.content <- paste0( page.content, sprintf("\n \n %s\n", s_css, rvs) ) s_css <- summary.css for (j in 1:length(rv)) { if (length(rv) == 1) colspan <- n.cols - 1 else colspan <- length(string_ends_with(sprintf("_%i", j), x = var.names)) if (is.null(rv[[j]]) || is.na(rv[[j]][i]) || sjmisc::is_empty(rv[[j]][i])) { page.content <- paste0( page.content, sprintf("  \n", s_css, as.integer(colspan)) ) } else { rv.name <- gsub(pattern, "", names(rv[[j]][i])) if (length(rv) > 1 && !sjmisc::is_empty(rv.name)) suffix <- sprintf(" %s", rv.name) else suffix <- "" if (as_int) { page.content <- paste0( page.content, sprintf( " %i%s\n", s_css, as.integer(colspan), as.integer(rv[[j]][i]), suffix ) ) } else { page.content <- paste0( page.content, sprintf( " %.*f%s\n", s_css, as.integer(colspan), digits.re, rv[[j]][i], suffix ) ) } } } } page.content } create_stats <- function(data.list, data.string, firstsumrow, summary.css, var.names, n.cols) { page.content <- "" s_css <- "tdata leftalign summary" if (firstsumrow) s_css <- paste0(s_css, " firstsumrow") page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, sprintf(" %s\n", s_css, data.string)) # print all r-squared to table s_css <- summary.css if (firstsumrow) s_css <- paste0(s_css, " firstsumrow") for (i in 1:length(data.list)) { if (length(data.list) == 1) colspan <- n.cols - 1 else colspan <- length(string_ends_with(sprintf("_%i", i), x = var.names)) if (is.null(data.list[[i]])) { page.content <- paste0( page.content, sprintf("  \n", s_css, as.integer(colspan)) ) } else { page.content <- paste0( page.content, sprintf( " %.3f\n", s_css, as.integer(colspan), data.list[[i]] ) ) } } paste0(page.content, " \n") } sjPlot/R/utils.R0000644000176200001440000002775313624216706013236 0ustar liggesusersdata_frame <- function(...) { x <- data.frame(..., stringsAsFactors = FALSE) rownames(x) <- NULL x } # do we have a stan-model? is.stan <- function(x) inherits(x, c("stanreg", "stanfit", "brmsfit")) #' @importFrom sjmisc is_empty #' @importFrom dplyr n_distinct stan.has.multiranef <- function(x) { if (obj_has_name(x, "facet")) { ri <- string_starts_with("(Intercept", x = x$facet) if (!sjmisc::is_empty(ri)) { return(dplyr::n_distinct(x$facet[ri]) > 1) } } FALSE } has_value_labels <- function(x) { !(is.null(attr(x, "labels", exact = T)) && is.null(attr(x, "value.labels", exact = T))) } #' @importFrom grDevices axisTicks #' @importFrom dplyr if_else #' @importFrom sjmisc is_empty axis_limits_and_ticks <- function(axis.lim, min.val, max.val, grid.breaks, exponentiate, min.est, max.est) { # factor to multiply the axis limits. for exponentiated scales, # these need to be large enough to find appropriate pretty numbers fac.ll <- dplyr::if_else(exponentiate, .3, .95) fac.ul <- dplyr::if_else(exponentiate, 3.3, 1.05) # check for correct boundaries if (is.infinite(min.val) || is.na(min.val)) min.val <- min.est if (is.infinite(max.val) || is.na(max.val)) max.val <- max.est # for negative signs, need to change multiplier if (min.val < 0) fac.ll <- 1 / fac.ll if (max.val < 0) fac.ul <- 1 / fac.ul # axis limits if (is.null(axis.lim)) { lower_lim <- min.val * fac.ll upper_lim <- max.val * fac.ul } else { lower_lim <- axis.lim[1] upper_lim <- axis.lim[2] } # determine gridbreaks if (is.null(grid.breaks)) { if (exponentiate) { # make sure we have nice x-positions for breaks lower_lim <- round(lower_lim, 2) upper_lim <- round(upper_lim, 2) # for *very* small values, lower_lim might be zero, so # correct value here. else we have Inf as limit if (lower_lim == 0) lower_lim <- min.val * fac.ll / 10 # use pretty distances for log-scale ls <- log10(c(lower_lim, upper_lim)) ticks <- grDevices::axisTicks(c(floor(ls[1]), ceiling(ls[2])), log = TRUE) # truncate ticks to highest value below lower lim and # lowest value above upper lim ll <- which(ticks < lower_lim) if (!sjmisc::is_empty(ll) && length(ll) > 1) ticks <- ticks[ll[length(ll)]:length(ticks)] ul <- which(ticks > upper_lim) if (!sjmisc::is_empty(ul) && length(ul) > 1) ticks <- ticks[1:ul[1]] } else { ticks <- pretty(c(floor(lower_lim), ceiling(upper_lim))) } } else { if (length(grid.breaks) == 1) ticks <- seq(floor(lower_lim), ceiling(upper_lim), by = grid.breaks) else ticks <- grid.breaks } # save proper axis limits list(axis.lim = c(min(ticks), max(ticks)), ticks = ticks) } #' @importFrom insight model_info #' @importFrom dplyr case_when estimate_axis_title <- function(fit, axis.title, type, transform = NULL, multi.resp = NULL, include.zeroinf = FALSE) { # no automatic title for effect-plots if (type %in% c("eff", "pred", "int")) return(axis.title) # check default label and fit family if (is.null(axis.title)) { fitfam <- insight::model_info(fit) if (!is.null(multi.resp)) fitfam <- fitfam[[multi.resp]] else if (insight::is_multivariate(fit)) fitfam <- fitfam[[1]] axis.title <- dplyr::case_when( !is.null(transform) && transform == "plogis" ~ "Probabilities", is.null(transform) && fitfam$is_binomial ~ "Log-Odds", is.null(transform) && fitfam$is_ordinal ~ "Log-Odds", is.null(transform) && fitfam$is_multinomial ~ "Log-Odds", is.null(transform) && fitfam$is_categorical ~ "Log-Odds", is.null(transform) && fitfam$is_count ~ "Log-Mean", fitfam$is_count ~ "Incidence Rate Ratios", fitfam$is_ordinal ~ "Odds Ratios", fitfam$is_multinomial ~ "Odds Ratios", fitfam$is_categorical ~ "Odds Ratios", fitfam$is_binomial && !fitfam$is_logit ~ "Risk Ratios", fitfam$is_binomial ~ "Odds Ratios", TRUE ~ "Estimates" ) if (fitfam$is_zero_inflated && isTRUE(include.zeroinf)) { if (is.null(transform)) axis.title <- c(axis.title, "Log-Odds") else axis.title <- c(axis.title, "Odds Ratios") } } axis.title } #' @importFrom dplyr case_when get_p_stars <- function(pval, thresholds = NULL) { if (is.null(thresholds)) thresholds <- c(.05, .01, .001) dplyr::case_when( is.na(pval) ~ "", pval < thresholds[3] ~ "***", pval < thresholds[2] ~ "**", pval < thresholds[1] ~ "*", TRUE ~ "" ) } is_merMod <- function(fit) { inherits(fit, c("lmerMod", "glmerMod", "nlmerMod", "merModLmerTest")) } is_brms_mixed <- function(fit) { inherits(fit, "brmsfit") && !sjmisc::is_empty(fit$ranef) } # short checker so we know if we need more summary statistics like ICC #' @importFrom insight model_info is_multivariate is_mixed_model <- function(fit) { mi <- insight::model_info(fit) if (insight::is_multivariate(fit)) mi[[1]]$is_mixed else mi$is_mixed } nulldef <- function(x, y, z = NULL) { if (is.null(x)) { if (is.null(y)) z else y } else x } geom_intercept_line <- function(yintercept, axis.scaling, vline.color) { if (yintercept > axis.scaling$axis.lim[1] && yintercept < axis.scaling$axis.lim[2]) { t <- theme_get() if (is.null(t$panel.grid.major)) t$panel.grid.major <- t$panel.grid color <- nulldef(vline.color, t$panel.grid.major$colour, "grey90") minor_size <- nulldef(t$panel.grid.minor$size, .125) major_size <- nulldef(t$panel.grid.major$size, minor_size * 1.5) size <- major_size * 1.5 geom_hline(yintercept = yintercept, color = color, size = size) } else { NULL } } # same as above, but no check if intercept is within boundaries or not geom_intercept_line2 <- function(yintercept, vline.color) { t <- theme_get() if (is.null(t$panel.grid.major)) t$panel.grid.major <- t$panel.grid color <- nulldef(vline.color, t$panel.grid.major$colour, "grey90") minor_size <- nulldef(t$panel.grid.minor$size, .125) major_size <- nulldef(t$panel.grid.major$size, minor_size * 1.5) size <- major_size * 1.5 geom_hline(yintercept = yintercept, color = color, size = size) } check_se_argument <- function(se, type = NULL) { if (!is.null(se) && !is.null(type) && type %in% c("std", "std2")) { warning("No robust standard errors for `type = \"std\"` or `type = \"std2\"`.") se <- NULL } if (!is.null(se) && !is.null(type) && type == "re") { warning("No robust standard errors for `type = \"re\"`.") se <- NULL } se } list.depth <- function(this, thisdepth = 0) { # http://stackoverflow.com/a/13433689/1270695 if (!is.list(this)) { return(thisdepth) } else { return(max(unlist(lapply(this, list.depth, thisdepth = thisdepth + 1)))) } } #' @importFrom purrr map flatten_chr #' @importFrom sjmisc is_empty trim parse_terms <- function(x) { if (sjmisc::is_empty(x)) return(x) # get variable with suffix vars.pos <- which(as.vector(regexpr( pattern = " ([^\\]]*)\\]", text = x, perl = T )) != -1) # is empty? if (sjmisc::is_empty(vars.pos)) return(x) # get variable names. needed later to set as # names attributes vars.names <- clear_terms(x)[vars.pos] # get levels inside brackets tmp <- unlist(regmatches( x, gregexpr( pattern = " ([^\\]]*)\\]", text = x, perl = T ) )) # remove brackets tmp <- gsub("(\\[*)(\\]*)", "", tmp) # see if we have multiple values, split at comma tmp <- sjmisc::trim(strsplit(tmp, ",", fixed = T)) parsed.terms <- seq_len(length(tmp)) %>% purrr::map(~ sprintf("%s%s", vars.names[.x], tmp[[.x]])) %>% purrr::flatten_chr() c(x[-vars.pos], parsed.terms) } #' @importFrom sjmisc trim clear_terms <- function(x) { # get positions of variable names and see if we have # a suffix for certain values cleaned.pos <- regexpr(pattern = "\\s", x) # position "-1" means we only had variable name, no suffix replacers <- which(cleaned.pos == -1) # replace -1 with number of chars cleaned.pos[replacers] <- nchar(x)[replacers] # get variable names only sjmisc::trim(substr(x, 0, cleaned.pos)) } #' @importFrom purrr map_lgl #' @importFrom sjmisc is_empty is_empty_list <- function(x) { all(purrr::map_lgl(x, sjmisc::is_empty)) } model_deviance <- function(x) { tryCatch( { m_deviance(x) }, error = function(x) { NULL } ) } #' @importFrom performance performance_aic model_aic <- function(x) { performance::performance_aic(x) } #' @importFrom performance performance_aicc model_aicc <- function(x) { tryCatch( { performance::performance_aicc(x) }, error = function(x) { NULL } ) } #' @importFrom stats logLik model_loglik <- function(x) { tryCatch( { stats::logLik(x) }, error = function(x) { NULL } ) } #' @importFrom stats deviance m_deviance <- function(x) { if (is_merMod(x)) { if (!requireNamespace("lme4", quietly = TRUE)) { stop("Package 'lme4' required for this function to work, please install it.") } d <- lme4::getME(x, "devcomp")$cmp["dev"] if (is.na(d)) d <- stats::deviance(x, REML = FALSE) } else { d <- stats::deviance(x) } d } #' @importFrom purrr map as_vector tidy_label <- function(labs, sep = ".") { # create table, and check if any value label is duplicated duped.val <- names(which(table(labs) > 1)) # find position of duplicated labels dupes <- duped.val %>% purrr::map(~which(labs == .x)) %>% purrr::as_vector(.type = "double") # prefix labels with value labs[dupes] <- sprintf("%s%s%s", labs[dupes], sep, dupes) labs } #' @importFrom purrr map_df #' @importFrom insight find_random se_ranef <- function(object) { if (!requireNamespace("lme4", quietly = TRUE)) { stop("Package 'lme4' required for this function to work, please install it.") } if (inherits(object, "MixMod")) { se.bygroup <- lme4::ranef(object, post_vars = TRUE) vars.m <- attr(se.bygroup, "post_vars") if (dim(vars.m[[1]])[1] == 1) se.bygroup <- sqrt(unlist(vars.m)) else { se.bygroup <- do.call( rbind, purrr::map_df(vars.m, ~ t(as.data.frame(sqrt(diag(.x))))) ) dimnames(se.bygroup)[[2]] <- dimnames(vars.m[[1]])[[1]] se.bygroup <- list(se.bygroup) names(se.bygroup) <- insight::find_random(object, flatten = TRUE) } } else { se.bygroup <- lme4::ranef(object, condVar = TRUE) n.groupings <- length(se.bygroup) for (m in 1:n.groupings) { vars.m <- attr(se.bygroup[[m]], "postVar") K <- dim(vars.m)[1] J <- dim(vars.m)[3] names.full <- dimnames(se.bygroup[[m]]) se.bygroup[[m]] <- array(NA, c(J, K)) for (j in 1:J) { se.bygroup[[m]][j, ] <- sqrt(diag(as.matrix(vars.m[, , j]))) } dimnames(se.bygroup[[m]]) <- list(names.full[[1]], names.full[[2]]) } } se.bygroup } #' @importFrom insight n_obs get_observations <- function(model) { tryCatch( { insight::n_obs(model) }, error = function(x) { NULL } ) } #' @importFrom insight find_predictors get_data .labelled_model_data <- function(models) { # to be generic, make sure argument is a list if (!inherits(models, "list")) models <- list(models) # get model terms and model frame mf <- try(lapply(models, function(.x) insight::get_data(.x)[, -1, drop = FALSE]), silent = TRUE) # return NULL on error if (inherits(mf, "try-error")) { return(FALSE) } # get all variable labels for predictors lbs <- unlist(lapply(mf, function(x) { any(sapply(x, function(i) !is.null(attributes(i)$label))) })) any(lbs) } sjPlot/R/zzz.R0000644000176200001440000000071713446531454012724 0ustar liggesusers.onAttach <- function(libname, pkgname) { if (stats::runif(1) > .8) { packageStartupMessage("Learn more about sjPlot with 'browseVignettes(\"sjPlot\")'.") } else if (stats::runif(1) > .8) { packageStartupMessage("Install package \"strengejacke\" from GitHub (`devtools::install_github(\"strengejacke/strengejacke\")`) to load all sj-packages at once!") } else if (stats::runif(1) > .8) { packageStartupMessage("#refugeeswelcome") } } sjPlot/R/sjPlotCorr.R0000644000176200001440000002433413662304072014163 0ustar liggesusers#' @title Plot correlation matrix #' @name sjp.corr #' #' @description Plot correlation matrix as ellipses or tiles. #' #' @param data Matrix with correlation coefficients as returned by the #' \code{\link{cor}}-function, or a \code{data.frame} of variables where #' correlations between columns should be computed. #' @param sort.corr Logical, if \code{TRUE} (default), the axis labels are sorted #' according to the correlation strength. If \code{FALSE}, axis labels #' appear in order of how variables were included in the cor-computation or #' data frame. #' @param decimals Indicates how many decimal values after comma are printed when #' the values labels are shown. Default is 3. Only applies when #' \code{show.values = TRUE}. #' @param na.deletion Indicates how missing values are treated. May be either #' \code{"listwise"} (default) or \code{"pairwise"}. May be #' abbreviated. #' @param corr.method Indicates the correlation computation method. May be one of #' \code{"pearson"} (default), \code{"spearman"} or \code{"kendall"}. #' May be abbreviated. #' @param p.numeric Logical, if \code{TRUE}, the p-values are printed #' as numbers. If \code{FALSE} (default), asterisks are used. #' #' @inheritParams plot_grpfrq #' @inheritParams plot_gpt #' #' @return (Insisibily) returns the ggplot-object with the complete plot (\code{plot}) as well as the data frame that #' was used for setting up the ggplot-object (\code{df}) and the original correlation matrix #' (\code{corr.matrix}). #' #' @note If \code{data} is a matrix with correlation coefficients as returned by #' the \code{\link{cor}}-function, p-values can't be computed. #' Thus, \code{show.p} and \code{p.numeric} #' only have an effect if \code{data} is a \code{\link{data.frame}}. #' #' @details Required argument is either a \code{\link{data.frame}} or a matrix with correlation coefficients #' as returned by the \code{\link{cor}}-function. In case of ellipses, the #' ellipses size indicates the strength of the correlation. Furthermore, #' blue and red colors indicate positive or negative correlations, where #' stronger correlations are darker. #' #' @import ggplot2 #' @importFrom tidyr gather #' @importFrom scales brewer_pal grey_pal #' @importFrom stats cor cor.test na.omit #' @export sjp.corr <- function(data, title = NULL, axis.labels = NULL, sort.corr = TRUE, decimals = 3, na.deletion = c("listwise", "pairwise"), corr.method = c("pearson", "spearman", "kendall"), geom.colors = "RdBu", wrap.title = 50, wrap.labels = 20, show.legend = FALSE, legend.title = NULL, show.values = TRUE, show.p = TRUE, p.numeric = FALSE) { .Deprecated(msg = "'sjp.corr' is deprecated. Please use 'correlation::correlation()' and its related plot()-method.") # -------------------------------------------------------- # check p-value-style option # -------------------------------------------------------- opt <- getOption("p_zero") if (is.null(opt) || opt == FALSE) { p_zero <- "" } else { p_zero <- "0" } # -------------------------------------------------------- # check args # -------------------------------------------------------- na.deletion <- match.arg(na.deletion) corr.method <- match.arg(corr.method) # -------------------------------------------------------- # try to automatically set labels is not passed as argument # -------------------------------------------------------- if (is.null(axis.labels) && is.data.frame(data)) { axis.labels <- unname(sjlabelled::get_label(data, def.value = colnames(data))) } # ---------------------------- # set color palette # ---------------------------- if (is.brewer.pal(geom.colors[1])) { geom.colors <- scales::brewer_pal(palette = geom.colors[1])(5) } else if (geom.colors[1] == "gs") { geom.colors <- scales::grey_pal()(5) } # ---------------------------- # check if user has passed a data frame # or a pca object # ---------------------------- if (any(class(data) == "matrix")) { corr <- data cpvalues <- NULL } else { # missing deletion corresponds to # SPSS listwise if (na.deletion == "listwise") { data <- stats::na.omit(data) corr <- stats::cor(data, method = corr.method) } # missing deletion corresponds to # SPSS pairwise else { corr <- stats::cor(data, method = corr.method, use = "pairwise.complete.obs") } #--------------------------------------- # if we have a data frame as argument, # compute p-values of significances #--------------------------------------- computePValues <- function(df) { cp <- c() for (i in seq_len(ncol(df))) { pv <- c() for (j in seq_len(ncol(df))) { test <- suppressWarnings(stats::cor.test(df[[i]], df[[j]], alternative = "two.sided", method = corr.method)) pv <- c(pv, round(test$p.value, 4)) } cp <- rbind(cp, pv) } return(cp) } cpvalues <- computePValues(data) } # ---------------------------- # check if user defined labels have been supplied # if not, use variable names from data frame # ---------------------------- if (is.null(axis.labels)) axis.labels <- row.names(corr) # ---------------------------- # Prepare length of title and labels # ---------------------------- # check length of diagram title and split longer string at into new lines if (!is.null(title)) title <- sjmisc::word_wrap(title, wrap.title) # check length of x-axis-labels and split longer strings at into new lines if (!is.null(axis.labels)) axis.labels <- sjmisc::word_wrap(axis.labels, wrap.labels) # -------------------------------------------------------- # order correlations from highest to lowest correlation coefficient # -------------------------------------------------------- if (sort.corr) { neword <- order(corr[1, ]) orderedCorr <- corr[neword, neword] # order variable labels as well axis.labels <- axis.labels[neword] if (!is.null(cpvalues)) cpvalues <- cpvalues[neword, neword] } else { cl <- ncol(corr) orderedCorr <- corr[cl:1, cl:1] axis.labels <- rev(axis.labels) if (!is.null(cpvalues)) cpvalues <- cpvalues[cl:1, cl:1] } # -------------------------------------------------------- # prepare a ordering-index-column needed for the data frame # that is passed to the ggplot # -------------------------------------------------------- yo <- c() for (i in seq_len(nrow(corr))) { yo <- c(yo, rep(i, nrow(corr))) } # -------------------------------------------------------- # melt correlation matrix and create data frame # -------------------------------------------------------- orderedCorr <- tidyr::gather(data.frame(orderedCorr), "var", "value", !! seq_len(ncol(orderedCorr)), factor_key = TRUE) # orderedCorr <- melt(orderedCorr) if (!is.null(cpvalues)) cpvalues <- tidyr::gather(data.frame(cpvalues), "var", "value", !! seq_len(ncol(cpvalues)), factor_key = TRUE) # if (!is.null(cpvalues)) cpvalues <- melt(cpvalues) # bind additional information like order for x- and y-axis # as well as the size of plotted points orderedCorr <- cbind(orderedCorr, ordx = seq_len(nrow(corr)), ordy = yo) # -------------------------------------------------------- # add column with significance value # -------------------------------------------------------- if (!is.null(cpvalues)) { if (!p.numeric) { cpv <- sapply(cpvalues$value, get_p_stars) } else { cpv <- sapply(cpvalues$value, function(x) { if (x < 0.001) x <- sprintf("\n(< %s.001)", p_zero) else x <- sub("0", p_zero, sprintf("\n(%.*f)", decimals, x)) }) } } else { cpv <- "" } orderedCorr$ps <- cpv # -------------------------------------------------------- # set visibility of labels # -------------------------------------------------------- if (!show.values) { orderedCorr$val.labels <- "" } else { if (show.p) { orderedCorr$val.labels <- sprintf("%.*f%s", decimals, orderedCorr$value, orderedCorr$ps) } else { orderedCorr$val.labels <- sprintf("%.*f", decimals, orderedCorr$value) } } orderedCorr$val.labels[orderedCorr$ordx >= orderedCorr$ordy] <- NA orderedCorr$ordx <- as.factor(orderedCorr$ordx) orderedCorr$ordy <- as.factor(orderedCorr$ordy) message(sprintf("Computing correlation using %s-method with %s-deletion...", corr.method, na.deletion)) # -------------------------------------------------------- # start with base plot object here # -------------------------------------------------------- corrPlot <- ggplot(orderedCorr, aes_string(x = "ordx", y = "ordy", fill = "value", label = "val.labels")) + geom_tile(size = 0, colour = "black") + # fill gradient colour from distinct color brewer palette. negative correlations are dark # red, positive corr. are dark blue, and they become lighter the closer they are to a # correlation coefficient of zero scale_x_discrete(labels = axis.labels, breaks = seq_len(length(axis.labels))) + scale_y_discrete(labels = axis.labels, breaks = seq_len(length(axis.labels))) + # set limits to (-1,1) to make sure the whole color palette is used. this # is the colour scale for geoms scale_fill_gradientn(colours = geom.colors, limits = c(-1,1)) + geom_text(size = 3.5, colour = "black") + labs(title = title, x = NULL, y = NULL) if (show.legend) corrPlot <- corrPlot + guides(fill = legend.title) else corrPlot <- corrPlot + guides(fill = "none") corrPlot } sjPlot/R/plot_grpfrq.R0000644000176200001440000010626113776334277014441 0ustar liggesusers#' @title Plot grouped or stacked frequencies #' @name plot_grpfrq #' #' @description Plot grouped or stacked frequencies of variables as bar/dot, #' box or violin plots, or line plot. #' #' @param var.cnt Vector of counts, for which frequencies or means will be plotted or printed. #' @param var.grp Factor with the cross-classifying variable, where \code{var.cnt} #' is grouped into the categories represented by \code{var.grp}. #' @param weight.by Vector of weights that will be applied to weight all cases. #' Must be a vector of same length as the input vector. Default is #' \code{NULL}, so no weights are used. #' @param title.wtd.suffix Suffix (as string) for the title, if \code{weight.by} is specified, #' e.g. \code{title.wtd.suffix=" (weighted)"}. Default is \code{NULL}, so #' title will not have a suffix when cases are weighted. #' @param intr.var An interaction variable which can be used for box plots. Divides each category indicated #' by \code{var.grp} into the factors of \code{intr.var}, so that each category of \code{var.grp} #' is subgrouped into \code{intr.var}'s categories. Only applies when #' \code{type = "boxplot"} or \code{type = "violin"}. #' @param bar.pos Indicates whether bars should be positioned side-by-side (default), #' or stacked (\code{bar.pos = "stack"}). May be abbreviated. #' @param type Specifies the plot type. May be abbreviated. #' \describe{ #' \item{\code{"bar"}}{for simple bars (default)} #' \item{\code{"dot"}}{for a dot plot} #' \item{\code{"histogram"}}{for a histogram (does not apply to grouped frequencies)} #' \item{\code{"line"}}{for a line-styled histogram with filled area} #' \item{\code{"density"}}{for a density plot (does not apply to grouped frequencies)} #' \item{\code{"boxplot"}}{for box plot} #' \item{\code{"violin"}}{for violin plots} #' } #' @param show.legend logical, if \code{TRUE}, and depending on plot type and #' function, a legend is added to the plot. #' @param ylim numeric vector of length two, defining lower and upper axis limits #' of the y scale. By default, this argument is set to \code{NULL}, i.e. the #' y-axis fits to the required range of the data. #' @param facet.grid \code{TRUE} to arrange the lay out of of multiple plots #' in a grid of an integrated single plot. This argument calls #' \code{\link[ggplot2]{facet_wrap}} or \code{\link[ggplot2]{facet_grid}} #' to arrange plots. Use \code{\link{plot_grid}} to plot multiple plot-objects #' as an arranged grid with \code{\link[gridExtra]{grid.arrange}}. #' @param title character vector, used as plot title. Depending on plot type and function, #' will be set automatically. If \code{title = ""}, no title is printed. #' For effect-plots, may also be a character vector of length > 1, #' to define titles for each sub-plot or facet. #' @param legend.title character vector, used as title for the plot legend. #' @param axis.labels character vector with labels used as axis labels. Optional #' argument, since in most cases, axis labels are set automatically. #' @param intr.var.labels a character vector with labels for the x-axis breaks #' when having interaction variables included. #' These labels replace the \code{axis.labels}. Only applies, when using box or violin plots #' (i.e. \code{type = "boxplot"} or \code{"violin"}) and \code{intr.var} is not \code{NULL}. #' @param legend.labels character vector with labels for the guide/legend. #' @param wrap.title numeric, determines how many chars of the plot title are displayed in #' one line and when a line break is inserted. #' @param wrap.labels numeric, determines how many chars of the value, variable or axis #' labels are displayed in one line and when a line break is inserted. #' @param wrap.legend.title numeric, determines how many chars of the legend's title #' are displayed in one line and when a line break is inserted. #' @param wrap.legend.labels numeric, determines how many chars of the legend labels are #' displayed in one line and when a line break is inserted. #' @param grid.breaks numeric; sets the distance between breaks for the axis, #' i.e. at every \code{grid.breaks}'th position a major grid is being printed. #' @param inner.box.width width of the inner box plot that is plotted inside of violin plots. Only applies #' if \code{type = "violin"}. Default value is 0.15 #' @param inner.box.dotsize size of mean dot insie a violin or box plot. Applies only #' when \code{type = "violin"} or \code{"boxplot"}. #' @param geom.colors user defined color for geoms. See 'Details' in \code{\link{plot_grpfrq}}. #' @param geom.size size resp. width of the geoms (bar width, line thickness or point size, #' depending on plot type and function). Note that bar and bin widths mostly #' need smaller values than dot sizes. #' @param geom.spacing the spacing between geoms (i.e. bar spacing) #' @param smooth.lines prints a smooth line curve. Only applies, when argument \code{type = "line"}. #' @param expand.grid logical, if \code{TRUE}, the plot grid is expanded, i.e. there is a small margin between #' axes and plotting region. Default is \code{FALSE}. #' @param show.values Logical, whether values should be plotted or not. #' @param show.n logical, if \code{TRUE}, adds total number of cases for each #' group or category to the labels. #' @param show.axis.values logical, whether category, count or percentage values for the axis #' should be printed or not. #' @param show.prc logical, if \code{TRUE} (default), percentage values are plotted to each bar #' If \code{FALSE}, percentage values are removed. #' @param show.ci Logical, if \code{TRUE)}, adds notches to the box plot, which are #' used to compare groups; if the notches of two boxes do not overlap, #' medians are considered to be significantly different. #' @param emph.dots logical, if \code{TRUE}, the groups of dots in a dot-plot are highlighted #' with a shaded rectangle. #' @param show.summary logical, if \code{TRUE} (default), a summary with chi-squared #' statistics (see \code{\link{chisq.test}}), Cramer's V or Phi-value etc. #' is shown. If a cell contains expected values lower than five (or lower than 10 #' if df is 1), the Fisher's exact test (see \code{\link{fisher.test}}) is #' computed instead of chi-squared test. If the table's matrix is larger #' than 2x2, Fisher's exact test with Monte Carlo simulation is computed. #' @param show.grpcnt logical, if \code{TRUE}, the count within each group is added #' to the category labels (e.g. \code{"Cat 1 (n=87)"}). Default value is \code{FALSE}. #' @param summary.pos position of the model summary which is printed when \code{show.summary} #' is \code{TRUE}. Default is \code{"r"}, i.e. it's printed to the upper right corner. #' Use \code{"l"} for upper left corner. #' @param axis.titles character vector of length one or two, defining the title(s) #' for the x-axis and y-axis. #' @param drop.empty Logical, if \code{TRUE} and the variable's values are labeled, #' values that have no observations are still printed in the table (with #' frequency \code{0}). If \code{FALSE}, values / factor levels with no occurrence #' in the data are omitted from the output. #' @param auto.group numeric value, indicating the minimum amount of unique values #' in the count variable, at which automatic grouping into smaller units #' is done (see \code{\link[sjmisc]{group_var}}). Default value for #' \code{auto.group} is \code{NULL}, i.e. auto-grouping is off. #' See \code{\link[sjmisc]{group_var}} for examples on grouping. #' @param coord.flip logical, if \code{TRUE}, the x and y axis are swapped. #' @param vjust character vector, indicating the vertical position of value #' labels. Allowed are same values as for \code{vjust} aesthetics from #' \code{ggplot2}: "left", "center", "right", "bottom", "middle", "top" and #' new options like "inward" and "outward", which align text towards and #' away from the center of the plot respectively. #' @param hjust character vector, indicating the horizontal position of value #' labels. Allowed are same values as for \code{vjust} aesthetics from #' \code{ggplot2}: "left", "center", "right", "bottom", "middle", "top" and #' new options like "inward" and "outward", which align text towards and #' away from the center of the plot respectively. #' @param y.offset numeric, offset for text labels when their alignment is adjusted #' to the top/bottom of the geom (see \code{hjust} and \code{vjust}). #' @param show.na logical, if \code{TRUE}, \code{\link{NA}}'s (missing values) #' are added to the output. #' #' @return A ggplot-object. #' #' @details \code{geom.colors} may be a character vector of color values #' in hex-format, valid color value names (see \code{demo("colors")} or #' a name of a \href{ https://colorbrewer2.org/}{color brewer} palette. #' Following options are valid for the \code{geom.colors} argument: #' \itemize{ #' \item If not specified, a default color brewer palette will be used, which is suitable for the plot style (i.e. diverging for likert scales, qualitative for grouped bars etc.). #' \item If \code{"gs"}, a greyscale will be used. #' \item If \code{"bw"}, and plot-type is a line-plot, the plot is black/white and uses different line types to distinguish groups (see \href{https://strengejacke.github.io/sjPlot/articles/blackwhitefigures.html}{this package-vignette}). #' \item If \code{geom.colors} is any valid color brewer palette name, the related palette will be used. Use \code{RColorBrewer::display.brewer.all()} to view all available palette names. #' \item Else specify own color values or names as vector (e.g. \code{geom.colors = c("#f00000", "#00ff00")}). #' } #' #' @examples #' data(efc) #' plot_grpfrq(efc$e17age, efc$e16sex, show.values = FALSE) #' #' # boxplot #' plot_grpfrq(efc$e17age, efc$e42dep, type = "box") #' #' # grouped bars #' plot_grpfrq(efc$e42dep, efc$e16sex, title = NULL) #' #' # box plots with interaction variable #' plot_grpfrq(efc$e17age, efc$e42dep, intr.var = efc$e16sex, type = "box") #' #' # Grouped bar plot #' plot_grpfrq(efc$neg_c_7, efc$e42dep, show.values = FALSE) #' #' # same data as line plot #' plot_grpfrq(efc$neg_c_7, efc$e42dep, type = "line") #' #' # show ony categories where we have data (i.e. drop zero-counts) #' library(dplyr) #' efc <- dplyr::filter(efc, e42dep %in% c(3,4)) #' plot_grpfrq(efc$c161sex, efc$e42dep, drop.empty = TRUE) #' #' # show all categories, even if not in data #' plot_grpfrq(efc$c161sex, efc$e42dep, drop.empty = FALSE) #' #' @import ggplot2 #' @importFrom sjstats weight2 #' @importFrom tidyr gather #' @importFrom dplyr group_by mutate arrange summarise #' @importFrom stats na.omit xtabs wilcox.test sd #' @importFrom rlang .data #' @export plot_grpfrq <- function(var.cnt, var.grp, type = c("bar", "dot", "line", "boxplot", "violin"), bar.pos = c("dodge", "stack"), weight.by = NULL, intr.var = NULL, title = "", title.wtd.suffix = NULL, legend.title = NULL, axis.titles = NULL, axis.labels = NULL, legend.labels = NULL, intr.var.labels = NULL, wrap.title = 50, wrap.labels = 15, wrap.legend.title = 20, wrap.legend.labels = 20, geom.size = NULL, geom.spacing = 0.15, geom.colors = "Paired", show.values = TRUE, show.n = TRUE, show.prc = TRUE, show.axis.values = TRUE, show.ci = FALSE, show.grpcnt = FALSE, show.legend = TRUE, show.na = FALSE, show.summary = FALSE, drop.empty = TRUE, auto.group = NULL, ylim = NULL, grid.breaks = NULL, expand.grid = FALSE, inner.box.width = 0.15, inner.box.dotsize = 3, smooth.lines = FALSE, emph.dots = TRUE, summary.pos = "r", facet.grid = FALSE, coord.flip = FALSE, y.offset = NULL, vjust = "bottom", hjust = "center") { # get variable names var.name.cnt <- get_var_name(deparse(substitute(var.cnt))) var.name.grp <- get_var_name(deparse(substitute(var.grp))) # remove empty value-labels if (drop.empty) { var.cnt <- sjlabelled::drop_labels(var.cnt) var.grp <- sjlabelled::drop_labels(var.grp) } # copy titles if (is.null(axis.titles)) { axisTitle.x <- NULL axisTitle.y <- NULL } else { axisTitle.x <- axis.titles[1] if (length(axis.titles) > 1) axisTitle.y <- axis.titles[2] else axisTitle.y <- NULL } # match arguments type <- match.arg(type) bar.pos <- match.arg(bar.pos) # turn off legend by default for facet grids if (facet.grid && missing(show.legend)) show.legend <- FALSE # Plot margins if (expand.grid) expand.grid <- waiver() else expand.grid <- c(0, 0) # check default geom.size if (is.null(geom.size)) { geom.size <- dplyr::case_when( type == "bar" ~ .7, type == "dot" ~ 3, type == "line" ~ .8, type == "boxplot" ~ .5, type == "violin" ~ .6, TRUE ~ .7 ) } # set text label offset if (is.null(y.offset)) { # get maximum y-pos y.offset <- ceiling(max(table(var.cnt, var.grp)) / 100) if (coord.flip) { if (missing(vjust)) vjust <- "center" if (missing(hjust)) hjust <- "bottom" # for flipped coordinates, we need to adjust # y-offset according to horizontal adjustemnt of labels if (hjust == "bottom") y_offset <- y.offset else if (hjust == "top") y_offset <- -y.offset else y_offset <- 0 } else { # for non-flipped coordinates, we need to adjust # y-offset according to vertical adjustemnt of labels if (vjust == "bottom") y_offset <- y.offset else if (vjust == "top") y_offset <- -y.offset else y_offset <- 0 } } else { y_offset <- y.offset } # Interaction variable defined for invalid plot type? if (!is.null(intr.var) && type != "boxplot" && type != "violin") { message("`intr.var` only applies to boxplots and violinplots (see `type`) and will be ignored.") } if (show.grpcnt && type %in% c("boxplot", "violin")) { message("`show.grpcnt` does not apply to boxplots and violinplots and will be ignored.") show.grpcnt <- FALSE } # auto-set plot title for box plots? if (missing(title) && (type == "boxplot" || type == "violin")) title <- NULL # check whether variable should be auto-grouped if (!is.null(auto.group) && length(unique(var.cnt)) >= auto.group) { message(sprintf( "%s has %i unique values and was grouped...", var.name.cnt, length(unique(var.cnt)) )) # check for default auto-group-size or user-defined groups agcnt <- ifelse(auto.group < 30, auto.group, 30) # group axis labels axis.labels <- sjmisc::group_labels( sjmisc::to_value(var.cnt, keep.labels = F), size = "auto", n = agcnt ) # group variable grp.var.cnt <- sjmisc::group_var( sjmisc::to_value(var.cnt, keep.labels = F), size = "auto", as.num = TRUE, n = agcnt, append = FALSE ) # set value labels grp.var.cnt <- sjlabelled::set_labels(grp.var.cnt, labels = axis.labels) } else { grp.var.cnt <- var.cnt } # create cross table of frequencies and percentages mydat <- create.xtab.df( grp.var.cnt, var.grp, round.prz = 2, na.rm = !show.na, weight.by = weight.by ) # x-position as numeric factor, added later after # tidying bars.xpos <- seq_len(nrow(mydat$mydat)) # try to automatically set labels if not passed as argument if (missing(axis.labels) && (type == "boxplot" || type == "violin")) { axis.labels <- mydat$labels.grp # if we have interaction variable, legend should be shown by default, # unless explicitely set to FALSE if (missing(show.legend)) show.legend <- !is.null(intr.var) } if (is.null(axis.labels)) axis.labels <- mydat$labels.cnt # we need to know later whether user has supplied legend labels or not we_have_legend_labels <- FALSE # check for auto-getting labels, ot if user passed legend labels as argument if (is.null(legend.labels)) legend.labels <- mydat$labels.grp else we_have_legend_labels <- TRUE # go to interaction terms. in this case, due to interaction, the axis # labels become legend labels, but only if user has not specified # legend labels yet. In the latter case, leave legend labels unchanged. if (is.null(intr.var.labels) && !is.null(intr.var)) { intr.var.labels <- sjlabelled::get_labels( intr.var, attr.only = F, values = F, non.labelled = T ) # create repeating label for x-axis intr.var.labels <- rep(intr.var.labels, length.out = length(axis.labels) * length(intr.var.labels)) # we need a legend, cause x axis is labelled with interaction var value show.legend <- TRUE # has user specified legend labels before? if (!we_have_legend_labels) legend.labels <- axis.labels } if (is.null(axisTitle.x)) axisTitle.x <- sjlabelled::get_label(var.cnt, def.value = var.name.cnt) if (is.null(legend.title)) legend.title <- sjlabelled::get_label(var.grp, def.value = var.name.grp) if (is.null(title)) { t1 <- sjlabelled::get_label(var.cnt, def.value = var.name.cnt) t2 <- sjlabelled::get_label(var.grp, def.value = var.name.grp) if (!is.null(t1) && !is.null(t2)) title <- paste0(t1, " by ", t2) } # remove titles if empty if (!is.null(legend.title) && legend.title == "") legend.title <- NULL if (!is.null(axisTitle.x) && axisTitle.x == "") axisTitle.x <- NULL if (!is.null(axisTitle.y) && axisTitle.y == "") axisTitle.y <- NULL if (!is.null(title) && title == "") title <- NULL # variables may not be factors if (anyNA(as.numeric(stats::na.omit(var.cnt)))) var.cnt <- sjmisc::to_value(var.cnt, keep.labels = F) else var.cnt <- as.numeric(var.cnt) if (anyNA(as.numeric(stats::na.omit(var.grp)))) var.grp <- sjmisc::to_value(var.grp, keep.labels = F) else var.grp <- as.numeric(var.grp) # Define amount of categories grpcount <- length(legend.labels) # create cross table for stats, summary etc. # and weight variable colrange <- 2:(grpcount + 1) mydf <- tidyr::gather(mydat$mydat, key = "group", value = "frq", !! colrange, factor_key = TRUE) # add xpos now mydf$xpos <- as.factor(as.numeric(bars.xpos)) # add half of Percentage values as new y-position for stacked bars # mydat <- ddply(mydat, "count", transform, ypos = cumsum(frq) - 0.5*frq) mydf <- mydf %>% dplyr::group_by(.data$label) %>% dplyr::mutate(ypos = cumsum(.data$frq) - 0.5 * .data$frq) %>% dplyr::arrange(.data$label) # add percentages mydf$prz <- round(100 * mydf$frq / sum(mydf$frq), 2) # If we have boxplots, use different data frame structure if (type == "boxplot" || type == "violin") { # weight variable w <- ifelse(is.null(weight.by), 1, weight.by) # interaction variable if (is.null(intr.var)) iav <- 1 else iav <- intr.var # new data frame for box plots mydf <- stats::na.omit(data_frame(cbind( group = var.grp, frq = var.cnt, ia = iav, wb = w ))) if (!is.null(axis.labels) && length(axis.labels) > dplyr::n_distinct(mydf$group, na.rm = TRUE)) { axis.labels <- axis.labels[na.omit(unique(mydf$group))] } mydf$ia <- as.factor(mydf$ia) mydf$group <- as.factor(mydf$group) } # create expression with model summarys. used # for plotting in the diagram later mannwhitneyu <- function(count, grp) { if (min(grp, na.rm = TRUE) == 0) grp <- grp + 1 completeString <- "" cnt <- length(unique(stats::na.omit(grp))) for (i in 1:cnt) { for (j in i:cnt) { if (i != j) { xsub <- count[which(grp == i | grp == j)] ysub <- grp[which(grp == i | grp == j)] ysub <- ysub[which(!is.na(xsub))] xsub <- as.numeric(stats::na.omit(xsub)) ysub <- as.numeric(stats::na.omit(ysub)) wt <- stats::wilcox.test(xsub ~ ysub) if (wt$p.value < 0.001) { modsum <- as.character(as.expression(substitute( p[pgrp] < pval, list(pgrp = sprintf("(%i|%i)", i, j), pval = 0.001) ))) } else { modsum <- as.character(as.expression(substitute( p[pgrp] == pval, list(pgrp = sprintf("(%i|%i)", i, j), pval = sprintf("%.3f", wt$p.value))))) } completeString <- sprintf("%s * \",\" ~ ~ %s", completeString, modsum) } } } return(paste("\"Mann-Whitney-U:\" ~ ~ ", substring(completeString, 12), sep = "")) } # Check whether table summary should be printed modsum <- NULL if (show.summary) { if (type == "boxplot" || type == "violin") modsum <- mannwhitneyu(var.cnt, var.grp) else modsum <- crosstabsum(var.cnt, var.grp, weight.by) } # Prepare and trim legend labels to appropriate size if (!is.null(legend.labels)) legend.labels <- sjmisc::word_wrap(legend.labels, wrap.legend.labels) if (!is.null(legend.title)) legend.title <- sjmisc::word_wrap(legend.title, wrap.legend.title) if (!is.null(title)) { # if we have weighted values, say that in diagram's title if (!is.null(title.wtd.suffix)) title <- paste(title, title.wtd.suffix, sep = "") title <- sjmisc::word_wrap(title, wrap.title) } if (!is.null(axisTitle.x)) axisTitle.x <- sjmisc::word_wrap(axisTitle.x, wrap.title) if (!is.null(axisTitle.y)) axisTitle.y <- sjmisc::word_wrap(axisTitle.y, wrap.title) if (!is.null(axis.labels)) axis.labels <- sjmisc::word_wrap(axis.labels, wrap.labels) if (!is.null(intr.var)) { if (!is.null(intr.var.labels)) { intr.var.labels <- sjmisc::word_wrap(intr.var.labels, wrap.labels) } # If interaction-variable-labels were not defined, simply set numbers from 1 to # amount of categories instead else { iavarLabLength <- length(unique(stats::na.omit(intr.var))) intr.var.labels <- 1:iavarLabLength } } # add group counts to category labels if (show.grpcnt) { nas <- ifelse(isTRUE(show.na), "ifany", "no") # check whether we have interaction variables or not if (!is.null(intr.var.labels)) { # retrieve group counts by converting data column # into table if (is.null(weight.by)) { gc <- table(var.grp, intr.var, useNA = nas) } else { gc <- table(sjstats::weight2(var.grp, weight.by), intr.var, useNA = nas) } # determinte loop-steps lst <- length(intr.var.labels) # iterate category labels for (i in seq_len(lst)) { # remember original label ial <- intr.var.labels[i] # add group count to each cat. label intr.var.labels[i] <- paste(ial, " (n=", gc[1, i], ")", sep = "") intr.var.labels[i + lst] <- paste(ial, " (n=", gc[2, i], ")", sep = "") } } else { sums <- unname(rowSums(mydat$mydat[, -1])) # add group count to each cat. label axis.labels <- paste(axis.labels, " (n=", sums, ")", sep = "") sums <- unname(colSums(mydat$mydat[, -1])) # add group count to each cat. label legend.labels <- paste(legend.labels, " (n=", sums, ")", sep = "") } } # Prepare bar charts trimViolin <- FALSE lower_lim <- 0 # calculate upper y-axis-range # if we have a fixed value, use this one here if (!is.null(ylim) && length(ylim) == 2) { lower_lim <- ylim[1] upper_lim <- ylim[2] } else { # if we have boxplots, we have different ranges, so we can adjust # the y axis if (type == "boxplot" || type == "violin") { # use an extra standard-deviation as limits for the y-axis when we have boxplots lower_lim <- min(var.cnt, na.rm = TRUE) - floor(stats::sd(var.cnt, na.rm = TRUE)) upper_lim <- max(var.cnt, na.rm = TRUE) + ceiling(stats::sd(var.cnt, na.rm = TRUE)) # make sure that the y-axis is not below zero if (lower_lim < 0) { lower_lim <- 0 trimViolin <- TRUE } # else calculate upper y-axis-range depending # on the amount of cases... } else if (bar.pos == "stack") { upper_lim <- max(pretty(table(grp.var.cnt) * 1.05)) } else { # ... or the amount of max. answers per category upper_lim <- max(pretty(table(grp.var.cnt, var.grp) * 1.05)) } } # align dodged position of labels to bar positions if (type == "line") posdodge <- 0 else if (type == "dot") posdodge <- geom.spacing else posdodge <- geom.size + geom.spacing # init shaded rectangles for plot ganno <- NULL # check whether we have dots or bars if (type == "dot") { # position_dodge displays dots in a dodged position so we avoid overlay here. This may lead # to a more difficult distinction of group belongings, since the dots are "horizontally spread" # over the digram. For a better overview, we can add a "PlotAnnotation" (see "emph.dots) here. geob <- geom_point(position = position_dodge(posdodge),size = geom.size, shape = 16) # create shaded rectangle, so we know which dots belong to the same category if (emph.dots) { ganno <- annotate( "rect", xmin = as.numeric(mydf$xpos) - 0.4, xmax = as.numeric(mydf$xpos) + 0.4, ymin = lower_lim, ymax = upper_lim, fill = "grey80", alpha = 0.1 ) } } else if (type == "bar") { if (bar.pos == "dodge") geob <- geom_bar(stat = "identity", width = geom.size, position = position_dodge(posdodge)) else geob <- geom_bar(stat = "identity", width = geom.size, position = position_stack(reverse = TRUE)) } else if (type == "line") { if (smooth.lines) geob <- geom_line(size = geom.size, stat = "smooth", method = "loess") else geob <- geom_line(size = geom.size) } else if (type == "boxplot") { geob <- geom_boxplot(width = geom.size, notch = show.ci) } else if (type == "violin") { geob <- geom_violin(trim = trimViolin, width = geom.size) } else { geob <- geom_bar(stat = "identity", position = bar.pos, width = geom.size) } # don't display value labels when we have boxplots or violin plots if (type == "boxplot" || type == "violin") show.values <- FALSE if (show.values) { # set text positioning if (facet.grid) text.pos <- "identity" else text.pos <- position_dodge(posdodge) # if we have stacked bars, we need to apply # this stacked y-position to the labels as well if (bar.pos == "stack") { if (show.prc && show.n) { ggvaluelabels <- geom_text(aes(y = .data$ypos, label = sprintf("%i\n(%.01f%%)", .data$frq, .data$prz)), show.legend = FALSE) } else if (show.n) { ggvaluelabels <- geom_text(aes(y = .data$ypos, label = sprintf("%i", .data$frq)), show.legend = FALSE) } else if (show.prc) { ggvaluelabels <- geom_text(aes(y = .data$ypos, label = sprintf("%.01f%%", .data$prz)), show.legend = FALSE) } else { ggvaluelabels <- geom_text(aes(y = .data$frq), label = "", show.legend = FALSE) } } else { # if we have dodged bars or dots, we have to use a slightly # dodged position for labels # as well, sofor better reading if (show.prc && show.n) { if (coord.flip) { ggvaluelabels <- geom_text( aes(y = .data$frq + y_offset, label = sprintf("%i (%.01f%%)", .data$frq, .data$prz)), position = text.pos, vjust = vjust, hjust = hjust, show.legend = FALSE ) } else { ggvaluelabels <- geom_text( aes(y = .data$frq + y_offset, label = sprintf("%i\n(%.01f%%)", .data$frq, .data$prz)), position = text.pos, vjust = vjust, hjust = hjust, show.legend = FALSE ) } } else if (show.n) { ggvaluelabels <- geom_text( aes(y = .data$frq + y_offset, label = sprintf("%i", .data$frq)), position = text.pos, hjust = hjust, vjust = vjust, show.legend = FALSE ) } else if (show.prc) { ggvaluelabels <- geom_text( aes(y = .data$frq + y_offset, label = sprintf("%.01f%%", .data$prz)), position = text.pos, hjust = hjust, vjust = vjust, show.legend = FALSE ) } else { ggvaluelabels <- geom_text(aes(y = .data$frq), label = "", show.legend = FALSE) } } } else { ggvaluelabels <- geom_text(aes(y = .data$frq), label = "", show.legend = FALSE) } # Set up grid breaks if (is.null(grid.breaks)) gridbreaks <- waiver() else gridbreaks <- seq(lower_lim, upper_lim, by = grid.breaks) # Print plot if (type == "line") { # line plot need numeric x-scale mydf$xpos <- sjmisc::to_value(mydf$xpos, keep.labels = FALSE) # lines need colour aes baseplot <- ggplot(mydf, aes_string( x = "xpos", y = "frq", colour = "group", linetype = "group" )) + geob # continuous scale for lines needed scalex <- scale_x_continuous() } else if (type == "boxplot" || type == "violin") { if (is.null(intr.var)) { baseplot <- ggplot(mydf, aes_string( x = "group", y = "frq", fill = "group", weight = "wb" )) + geob scalex <- scale_x_discrete(labels = axis.labels) } else { baseplot <- ggplot(mydf, aes( x = interaction(.data$ia, .data$group), y = .data$frq, fill = .data$group, weight = .data$wb )) + geob scalex <- scale_x_discrete(labels = intr.var.labels) } # if we have a violin plot, add an additional boxplot inside to show # more information if (type == "violin") { if (show.ci) { baseplot <- baseplot + geom_boxplot(width = inner.box.width, fill = "white", outlier.colour = NA, notch = TRUE) } else { baseplot <- baseplot + geom_boxplot(width = inner.box.width, fill = "white", outlier.colour = NA) } } # if we have boxplots or violon plots, also add a point that indicates # the mean value # different fill colours, because violin boxplots have white background fcsp <- ifelse(type == "boxplot", "white", "black") baseplot <- baseplot + stat_summary(fun.y = "mean", geom = "point", shape = 21, size = inner.box.dotsize, fill = fcsp) } else { if (type == "dot") { baseplot <- ggplot(mydf, aes_string(x = "xpos", y = "frq", colour = "group")) # check whether we have dots plotted, and if so, use annotation # We have to use annotation first, because the diagram's layers are plotted # in the order as they're passed to the ggplot-command. Since we don't want the # shaded rectangles to overlay the dots, we add them first if (!is.null(ganno) && !facet.grid) baseplot <- baseplot + ganno } else { baseplot <- ggplot(mydf, aes_string(x = "xpos", y = "frq", fill = "group")) } # add geom baseplot <- baseplot + geob # define x axis scalex <- scale_x_discrete(labels = axis.labels) } # If we have bars or dot plots, we show # Pearson's chi-square test results baseplot <- print.table.summary(baseplot, modsum, summary.pos) # prepare y-axis and # show or hide y-axis-labels if (show.axis.values) { y_scale <- scale_y_continuous( breaks = gridbreaks, limits = c(lower_lim, upper_lim), expand = expand.grid ) } else { y_scale <- scale_y_continuous( breaks = gridbreaks, limits = c(lower_lim, upper_lim), expand = expand.grid, labels = NULL ) } # continue with plot objects... baseplot <- baseplot + # show absolute and percentage values for each bar ggvaluelabels + # add labels to x- and y-axis, and diagram title labs( title = title, x = axisTitle.x, y = axisTitle.y, fill = legend.title, colour = legend.title ) + # print value labels to the x-axis. # If argument "axis.labels" is NULL, the category numbers (1 to ...) # appear on the x-axis scalex + # set Y-axis, depending on the calculated upper y-range. # It either corresponds to the maximum amount of cases in the data set # (length of var) or to the highest count of var's categories. y_scale # check whether coordinates should be flipped if (coord.flip) baseplot <- baseplot + coord_flip() # Here we start when we have a faces grid instead of # a grouped bar plot. if (facet.grid) { baseplot <- baseplot + # set font size for axes. # theme(strip.text = element_text(face = "bold", size = rel(1.1))) + facet_wrap(~group, scales = "free") } # set geom colors baseplot <- sj.setGeomColors(baseplot, geom.colors, length(legend.labels), show.legend, legend.labels) # Plot integrated bar chart here baseplot } sjPlot/R/tab_pca.R0000644000176200001440000007004713662304072013455 0ustar liggesusers#' @title Summary of principal component analysis as HTML table #' @name tab_pca #' #' @description Performes a principle component analysis on a data frame or matrix #' (with varimax or oblimin rotation) and displays the factor solution as HTML #' table, or saves them as file. \cr \cr In case a data frame is used as #' parameter, the Cronbach's Alpha value for each factor scale will be calculated, #' i.e. all variables with the highest loading for a factor are taken for the #' reliability test. The result is an alpha value for each factor dimension. #' #' @param data A data frame that should be used to compute a PCA, or a \code{\link{prcomp}} object. #' @param rotation Rotation of the factor loadings. May be one of #' \code{"varimax", "quartimax", "promax", "oblimin", "simplimax", "cluster"} #' or \code{"none"}. #' @param nmbr.fctr Number of factors used for calculating the rotation. By #' default, this value is \code{NULL} and the amount of factors is #' calculated according to the Kaiser-criteria. #' @param fctr.load.tlrn Specifies the minimum difference a variable needs to have between #' factor loadings (components) in order to indicate a clear loading on just one factor and not #' diffusing over all factors. For instance, a variable with 0.8, 0.82 and 0.84 factor loading #' on 3 possible factors can not be clearly assigned to just one factor and thus would be removed #' from the principal component analysis. By default, the minimum difference of loading values #' between the highest and 2nd highest factor should be 0.1 #' @param show.cronb Logical, if \code{TRUE} (default), the cronbach's alpha value for each factor scale will be calculated, #' i.e. all variables with the highest loading for a factor are taken for the #' reliability test. The result is an alpha value for each factor dimension. #' Only applies when \code{data} is a data frame. #' @param show.msa Logical, if \code{TRUE}, shows an additional column with the measure of sampling adequacy according #' dor each component. #' @param show.var Logical, if \code{TRUE}, the proportions of variances for each component as well as cumulative #' variance are shown in the table footer. #' @param string.pov String for the table row that contains the proportions of variances. By default, #' \emph{"Proportion of Variance"} will be used. #' @param string.cpov String for the table row that contains the cumulative variances. By default, #' \emph{"Cumulative Proportion"} will be used. #' #' @inheritParams tab_model #' @inheritParams view_df #' @inheritParams tab_df #' @inheritParams tab_xtab #' @inheritParams plot_grpfrq #' @inheritParams tab_corr #' #' @return Invisibly returns #' \itemize{ #' \item the web page style sheet (\code{page.style}), #' \item the web page content (\code{page.content}), #' \item the complete html-output (\code{page.complete}), #' \item the html-table with inline-css for use with knitr (\code{knitr}), #' \item the \code{factor.index}, i.e. the column index of each variable with the highest factor loading for each factor and #' \item the \code{removed.items}, i.e. which variables have been removed because they were outside of the \code{fctr.load.tlrn}'s range. #' } #' for further use. #' #' @examples #' \dontrun{ #' # Data from the EUROFAMCARE sample dataset #' library(sjmisc) #' data(efc) #' #' # recveive first item of COPE-index scale #' start <- which(colnames(efc) == "c82cop1") #' # recveive last item of COPE-index scale #' end <- which(colnames(efc) == "c90cop9") #' # auto-detection of labels #' if (interactive()) { #' tab_pca(efc[, start:end]) #' }} #' @importFrom stats prcomp #' @importFrom performance cronbachs_alpha #' @export tab_pca <- function(data, rotation = c("varimax", "quartimax", "promax", "oblimin", "simplimax", "cluster", "none"), nmbr.fctr = NULL, fctr.load.tlrn = 0.1, title = "Principal Component Analysis", var.labels = NULL, wrap.labels = 40, show.cronb = TRUE, show.msa = FALSE, show.var = FALSE, alternate.rows = FALSE, digits = 2, string.pov = "Proportion of Variance", string.cpov = "Cumulative Proportion", CSS = NULL, encoding = NULL, file = NULL, use.viewer = TRUE, remove.spaces = TRUE) { # ------------------------------------- # check encoding # ------------------------------------- encoding <- get.encoding(encoding, data) rotation <- match.arg(rotation) # -------------------------------------------------------- # try to automatically set labels is not passed as parameter # -------------------------------------------------------- if (is.null(var.labels) && is.data.frame(data)) { var.labels <- sjlabelled::get_label(data, def.value = colnames(data)) } if (!requireNamespace("psych", quietly = TRUE)) { stop("Package 'psych' required for this function to work. Please install it.", call. = FALSE) } # ---------------------------- # check if user has passed a data frame # or a pca object # ---------------------------- if (inherits(data, "prcomp")) { pcadata <- data dataframeparam <- FALSE show.msa <- FALSE } else { pcadata <- stats::prcomp( stats::na.omit(data), retx = TRUE, center = TRUE, scale. = TRUE ) dataframeparam <- TRUE } # ------------------------------------- # init header # ------------------------------------- toWrite <- table.header <- sprintf("\n\n\n", encoding) # ------------------------------------- # init style sheet and tags used for css-definitions # we can use these variables for string-replacement # later for return value # ------------------------------------- tag.table <- "table" tag.caption <- "caption" tag.thead <- "thead" tag.tdata <- "tdata" tag.centeralign <- "centeralign" tag.rightalign <- "rightalign" tag.cronbach <- "cronbach" tag.msa <- "msa" tag.pov <- "pov" tag.cpov <- "cpov" tag.rotate <- "rotate" tag.kmo <- "kmo" tag.arc <- "arc" tag.minval <- "minval" tag.removable <- "removable" tag.firsttablerow <- "firsttablerow" tag.firsttablecol <- "firsttablecol" css.table <- "border-collapse:collapse; border:none;" css.caption <- "font-weight: bold; text-align:left;" css.thead <- "border-top:double black; padding:0.2cm;" css.tdata <- "padding:0.2cm;" css.centeralign <- "text-align:center;" css.rightalign <- "text-align:right;" css.cronbach <- "font-style:italic;" css.msa <- "font-style:italic; color:#666666;" css.kmo <- "font-style:italic; border-bottom:double;" css.rotate <- "font-style:italic; font-size:0.9em;" css.pov <- "font-style:italic; border-top:1px solid;" css.cpov <- "font-style:italic;" css.minval <- "color:#cccccc;" css.arc <- "background-color:#eaeaea;" css.removable <- "background-color:#eacccc;" css.firsttablerow <- "border-top:1px solid black;" css.firsttablecol <- "" if (!show.msa && !show.cronb) css.cpov <- sprintf("%s border-bottom:double;", css.cpov) if (!show.msa && show.cronb) css.cronbach <- sprintf("%s border-bottom:double;", css.cronbach) if (!show.var && show.cronb) css.cronbach <- sprintf("%s border-top:1px solid;", css.cronbach) if (!show.var && !show.cronb) css.kmo <- sprintf("%s border-top:1px solid;",css.kmo) if (!show.var && !show.cronb && !show.msa) css.table <- sprintf("%s border-bottom:double;", css.table) # ------------------------ # check user defined style sheets # ------------------------ if (!is.null(CSS)) { if (!is.null(CSS[['css.table']])) css.table <- ifelse(substring(CSS[['css.table']], 1, 1) == '+', paste0(css.table, substring(CSS[['css.table']], 2)), CSS[['css.table']]) if (!is.null(CSS[['css.thead']])) css.thead <- ifelse(substring(CSS[['css.thead']], 1, 1) == '+', paste0(css.thead, substring(CSS[['css.thead']], 2)), CSS[['css.thead']]) if (!is.null(CSS[['css.tdata']])) css.tdata <- ifelse(substring(CSS[['css.tdata']], 1, 1) == '+', paste0(css.tdata, substring(CSS[['css.tdata']], 2)), CSS[['css.tdata']]) if (!is.null(CSS[['css.caption']])) css.caption <- ifelse(substring(CSS[['css.caption']], 1, 1) == '+', paste0(css.caption, substring(CSS[['css.caption']], 2)), CSS[['css.caption']]) if (!is.null(CSS[['css.centeralign']])) css.centeralign <- ifelse(substring(CSS[['css.centeralign']], 1, 1) == '+', paste0(css.centeralign, substring(CSS[['css.centeralign']], 2)), CSS[['css.centeralign']]) if (!is.null(CSS[['css.rightalign']])) css.rightalign <- ifelse(substring(CSS[['css.rightalign']], 1, 1) == '+', paste0(css.rightalign, substring(CSS[['css.rightalign']], 2)), CSS[['css.rightalign']]) if (!is.null(CSS[['css.arc']])) css.arc <- ifelse(substring(CSS[['css.arc']], 1, 1) == '+', paste0(css.arc, substring(CSS[['css.arc']], 2)), CSS[['css.arc']]) if (!is.null(CSS[['css.firsttablerow']])) css.firsttablerow <- ifelse(substring(CSS[['css.firsttablerow']], 1, 1) == '+', paste0(css.firsttablerow, substring(CSS[['css.firsttablerow']], 2)), CSS[['css.firsttablerow']]) if (!is.null(CSS[['css.firsttablecol']])) css.firsttablecol <- ifelse(substring(CSS[['css.firsttablecol']], 1, 1) == '+', paste0(css.firsttablecol, substring(CSS[['css.firsttablecol']], 2)), CSS[['css.firsttablecol']]) if (!is.null(CSS[['css.cronbach']])) css.cronbach <- ifelse(substring(CSS[['css.cronbach']], 1, 1) == '+', paste0(css.cronbach, substring(CSS[['css.cronbach']], 2)), CSS[['css.cronbach']]) if (!is.null(CSS[['css.msa']])) css.msa <- ifelse(substring(CSS[['css.msa']], 1, 1) == '+', paste0(css.msa, substring(CSS[['css.msa']], 2)), CSS[['css.msa']]) if (!is.null(CSS[['css.kmo']])) css.kmo <- ifelse(substring(CSS[['css.kmo']], 1, 1) == '+', paste0(css.kmo, substring(CSS[['css.kmo']], 2)), CSS[['css.kmo']]) if (!is.null(CSS[['css.rotate']])) css.rotate <- ifelse(substring(CSS[['css.rotate']], 1, 1) == '+', paste0(css.rotate, substring(CSS[['css.rotate']], 2)), CSS[['css.rotate']]) if (!is.null(CSS[['css.pov']])) css.pov <- ifelse(substring(CSS[['css.pov']], 1, 1) == '+', paste0(css.pov, substring(CSS[['css.pov']], 2)), CSS[['css.pov']]) if (!is.null(CSS[['css.cpov']])) css.cpov <- ifelse(substring(CSS[['css.cpov']], 1, 1) == '+', paste0(css.cpov, substring(CSS[['css.cpov']], 2)), CSS[['css.cpov']]) if (!is.null(CSS[['css.minval']])) css.minval <- ifelse(substring(CSS[['css.minval']], 1, 1) == '+', paste0(css.minval, substring(CSS[['css.minval']], 2)), CSS[['css.minval']]) if (!is.null(CSS[['css.removable']])) css.removable <- ifelse(substring(CSS[['css.removable']], 1, 1) == '+', paste0(css.removable, substring(CSS[['css.removable']], 2)), CSS[['css.removable']]) } # ------------------------ # set page style # ------------------------ page.style <- sprintf("", tag.table, css.table, tag.caption, css.caption, tag.thead, css.thead, tag.tdata, css.tdata, tag.cronbach, css.cronbach, tag.minval, css.minval, tag.removable, css.removable, tag.firsttablerow, css.firsttablerow, tag.firsttablecol, css.firsttablecol, tag.centeralign, css.centeralign, tag.rightalign, css.rightalign, tag.rotate, css.rotate, tag.msa, css.msa, tag.kmo, css.kmo, tag.pov, css.pov, tag.cpov, css.cpov, tag.arc, css.arc) # ------------------------ # start content # ------------------------ toWrite <- paste0(toWrite, page.style) toWrite = paste(toWrite, "\n\n", "\n") # ---------------------------- # calculate eigenvalues # ---------------------------- pcadata.eigenval <- pcadata$sdev^2 # ---------------------------- # retrieve best amount of factors according # to Kaiser-critearia, i.e. factors with eigen value > 1 # ---------------------------- pcadata.kaiser <- which(pcadata.eigenval < 1)[1] - 1 # -------------------------------------------------------- # varimax rotation, retrieve factor loadings # -------------------------------------------------------- # check for predefined number of factors if (!is.null(nmbr.fctr) && is.numeric(nmbr.fctr)) pcadata.kaiser <- nmbr.fctr if (pcadata.kaiser < 2) { stop("Only one principal component extracted. Can't rotate loading matrices. You may use `nmbr.fctr` to extract more than one component.", call. = F) } rotation <- match.arg(rotation) # rotate matrix if (rotation == "varimax") pcadata.rotate <- varimaxrota(pcadata, pcadata.kaiser) else pcadata.rotate <- psych::principal(r = data, nfactors = pcadata.kaiser, rotate = rotation) # create data frame with factor loadings df <- as.data.frame(pcadata.rotate$loadings[, seq_len(ncol(pcadata.rotate$loadings))]) # ---------------------------- # check if user defined labels have been supplied # if not, use variable names from data frame # ---------------------------- if (is.null(var.labels)) var.labels <- row.names(df) # ---------------------------- # Prepare length of labels # ---------------------------- if (!is.null(var.labels)) { # wrap long variable labels var.labels <- sjmisc::word_wrap(var.labels, wrap.labels, "
") } # -------------------------------------------------------- # this function checks which items have unclear factor loadings, # i.e. which items do not strongly load on a single factor but # may load almost equally on several factors # -------------------------------------------------------- getRemovableItems <- function(dataframe) { # clear vector removers <- c() # iterate each row of the data frame. each row represents # one item with its factor loadings for (i in seq_len(nrow(dataframe))) { # get factor loadings for each item rowval <- as.numeric(abs(df[i, ])) # retrieve highest loading maxload <- max(rowval) # retrieve 2. highest loading max2load <- sort(rowval, TRUE)[2] # check difference between both if (abs(maxload - max2load) < fctr.load.tlrn) { # if difference is below the tolerance, # remeber row-ID so we can remove that items # for further PCA with updated data frame removers <- c(removers, i) } } # return a vector with index numbers indicating which items # have unclear loadings return(removers) } # -------------------------------------------------------- # this function retrieves a list with the column index ("factor" index) # where each case of the data frame has its highedt factor loading. # So we know to which "group" (factor dimension) each case of the # data frame belongs to according to the pca results # -------------------------------------------------------- getItemLoadings <- function(dataframe) { # return a vector with index numbers indicating which items # loads the highest on which factor return(apply(dataframe, 1, function(x) which.max(abs(x)))) } # -------------------------------------------------------- # this function calculates the cronbach's alpha value for # each factor scale, i.e. all variables with the highest loading # for a factor are taken for the reliability test. The result is # an alpha value for each factor dimension # -------------------------------------------------------- getCronbach <- function(dataframe, itemloadings) { # clear vector cbv <- c() # iterate all highest factor loadings of items for (n in seq_len(length(unique(itemloadings)))) { # calculate cronbach's alpha for those cases that all have the # highest loading on the same factor cbv <- c(cbv, performance::cronbachs_alpha(stats::na.omit(dataframe[, which(itemloadings == n)]))) } # cbv now contains the factor numbers and the related alpha values # for each "factor dimension scale" return(cbv) } # ---------------------------------- # Cronbach's Alpha can only be calculated when having a data frame # with each component / variable as column # ---------------------------------- if (dataframeparam) { # get alpha values alphaValues <- getCronbach(data, getItemLoadings(df)) } else { message("Cronbach's Alpha can only be calculated when having a data frame with each component / variable as column.") alphaValues <- NULL show.cronb <- FALSE } # ------------------------------------- # retrieve those items that have unclear factor loadings, i.e. # which almost load equally on several factors. The tolerance # that indicates which difference between factor loadings is # considered as "equally" is defined via fctr.load.tlrn # ------------------------------------- removableItems <- getRemovableItems(df) # ------------------------------------- # retrieve kmo and msa for data set # ------------------------------------- kmo <- NULL if (show.msa) kmo <- psych::KMO(data) # ------------------------------------- # variance # ------------------------------------- pov <- cpov <- NULL if (show.var) { pov <- summary(pcadata)$importance[2, seq_len(pcadata.kaiser)] cpov <- summary(pcadata)$importance[3, seq_len(pcadata.kaiser)] } # ------------------------------------- # convert data frame, add label names # ------------------------------------- maxdf <- apply(df, 1, function(x) max(abs(x))) # ------------------------------------- # start table tag # ------------------------------------- page.content <- "\n" # ------------------------------------- # table caption, variable label # ------------------------------------- if (!is.null(title)) page.content <- paste0(page.content, sprintf(" \n", title)) # ------------------------------------- # header row # ------------------------------------- # write tr-tag page.content <- paste0(page.content, " \n") # first column page.content <- paste0(page.content, " \n") # iterate columns for (i in seq_len(ncol(df))) { page.content <- paste0(page.content, sprintf(" \n", i)) } # check if msa column should be shown if (show.msa) page.content <- paste0(page.content, " \n") # close table row page.content <- paste0(page.content, " \n") # ------------------------------------- # data rows # ------------------------------------- # iterate all rows of df for (i in seq_len(nrow(df))) { # start table row rowcss <- "" # check for removable items in first row if (i %in% removableItems && i == 1) rowcss <- " firsttablerow removable" # check for removable items in other rows if (i %in% removableItems && i != 1) rowcss <- " removable" # check for non-removable items in first row if (is.na(match(i, removableItems)) && i == 1) rowcss <- " firsttablerow" # default row string for alternative row colors arcstring <- "" # if we have alternating row colors, set css if (alternate.rows) arcstring <- ifelse(sjmisc::is_even(i), " arc", "") # write tr-tag with class-attributes page.content <- paste0(page.content, " \n") # print first table cell page.content <- paste0(page.content, sprintf(" \n", arcstring, rowcss, var.labels[i])) # iterate all columns for (j in seq_len(ncol(df))) { # start table column colcss <- sprintf(" class=\"tdata centeralign%s%s\"", arcstring, rowcss) if (maxdf[[i]] != max(abs(df[i, j]))) colcss <- sprintf(" class=\"tdata centeralign minval%s%s\"", arcstring, rowcss) page.content <- paste0(page.content, sprintf(" %.*f\n", colcss, digits, df[i, j])) } # check if msa column should be shown if (show.msa) page.content <- paste0(page.content, sprintf(" \n", arcstring, rowcss, digits, kmo$MSAi[[i]])) # close row page.content <- paste0(page.content, " \n") } # ------------------------------------- # variance # ------------------------------------- if (show.var) { # write tr-tag with class-attributes page.content <- paste0(page.content, " \n") # first column page.content <- paste0(page.content, sprintf(" \n", string.pov)) # iterate alpha-values for (i in 1:length(pov)) { page.content <- paste0(page.content, sprintf(" \n", digits, 100 * pov[i])) } # check if msa column should be shown if (show.msa) page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, " \n \n") # first column page.content <- paste0(page.content, sprintf(" \n", string.cpov)) # iterate alpha-values for (i in 1:length(pov)) { page.content <- paste0(page.content, sprintf(" \n", digits, 100 * cpov[i])) } # check if msa column should be shown if (show.msa) page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, " \n") } # ------------------------------------- # cronbach's alpha # ------------------------------------- if (show.cronb && !is.null(alphaValues)) { # write tr-tag with class-attributes page.content <- paste0(page.content, " \n") # first column page.content <- paste0(page.content, " \n") # iterate alpha-values for (i in seq_len(length(alphaValues))) { page.content <- paste0(page.content, sprintf(" \n", digits, alphaValues[i])) } # check if msa column should be shown if (show.msa) page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, " \n") } # ------------------------------------- # Kaiser-Meyer-Olkin-Kriterium # ------------------------------------- if (show.msa) { # write tr-tag with class-attributes page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, sprintf(" \n", ncol(df))) page.content <- paste0(page.content, sprintf(" \n", digits, kmo$MSA)) page.content <- paste0(page.content, " \n") } # ------------------------------------- # show rotation # ------------------------------------- colsp <- ncol(df) + 1 if (show.msa) colsp <- colsp + 1 page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, sprintf(" \n", colsp, rotation)) page.content <- paste0(page.content, " \n") # ------------------------------------- # finish table # ------------------------------------- page.content <- paste(page.content, "\n
%s
 Component %iMSA
%s%.*f
%s%.*f %%
%s%.*f %%
Cronbach's α%.*f
Kaiser-Meyer-Olkin%.*f
%s-rotation
") # ------------------------------------- # finish html page # ------------------------------------- toWrite <- paste(toWrite, page.content, "\n") toWrite <- paste0(toWrite, "") # ------------------------------------- # create list with factor loadings that indicate # on which column inside the data frame the highest # loading is # ------------------------------------- factorindex <- apply(df, 1, function(x) which.max(abs(x))) # ------------------------------------- # replace class attributes with inline style, # useful for knitr # ------------------------------------- # copy page content # ------------------------------------- knitr <- page.content # ------------------------------------- # set style attributes for main table tags # ------------------------------------- knitr <- gsub("class=", "style=", knitr, fixed = TRUE, useBytes = TRUE) knitr <- gsub("% as.data.frame() %>% purrr::map_df(~ .x[length(.x):1]) %>% tidyr::gather() %>% dplyr::arrange(.data$key) ) x$y <- rep_len(1:longest.pal, nrow(x)) x$cols = as.factor(1:nrow(x)) x$key <- factor(x$key, levels = rev(unique(x$key))) x$group <- "Other Palettes" x$group[.is_cont_scale(x$key)] <- "Continuous Palettes" x$group[x$key %in% c("breakfast.club", "flat", "metro", "quadro", "set1", "simply", "social")] <- "Red-Blue-Green Palettes" ggplot(x, aes_string(x = "key", fill = "cols")) + geom_bar(width = .7) + scale_fill_manual(values = x$value) + scale_y_continuous(breaks = NULL, labels = NULL) + guides(fill = "none") + coord_flip() + theme_minimal() + labs(x = NULL, y = NULL) + facet_wrap(~group, ncol = 1, scales = "free") } #' @importFrom grDevices colorRampPalette get_sjplot_pal <- function(palette = "metro", reverse = FALSE, ...) { pal <- sjplot_colors[[palette]] if (reverse) pal <- rev(pal) grDevices::colorRampPalette(pal, ...) } #' @rdname sjPlot-themes #' @export css_theme <- function(css.theme = "regression") { if (!(css.theme %in% names(css.themes))) { warning(sprintf("No valid CSS-theme name. Current available themes are: %s", paste(names(css.themes), collapse = ", ")), call. = FALSE) return(NULL) } css.themes[[css.theme]] } css.themes <- list( `regression` = list( css.thead = "border-top: double; text-align:center; font-style:normal; font-weight:bold; padding:0.2cm;", css.firsttablerow = "", css.summarydata = "text-align:left;" ), `cells` = list( css.td = "border:1px solid black;", css.thead = "border:1px solid black;" ), `right_aligned` = list( css.tdata = "padding:0.2cm; text-align:right; vertical-align:middle;" ) ) sjPlot/R/S3-methods.R0000644000176200001440000003467113521065407014015 0ustar liggesusers#' @importFrom utils browseURL #' @export print.sjTable <- function(x, ...) { # check if we have filename specified if (!is.null(x$file)) { # write file write(x$knitr, file = x$file) } else { x$page.complete <- replace_umlauts(x$page.complete) # else open in viewer pane htmlFile <- tempfile(fileext = ".html") write(x$page.complete, file = htmlFile) # check whether we have RStudio Viewer viewer <- getOption("viewer") if (x$viewer && !is.null(viewer)) { viewer(htmlFile) } else { utils::browseURL(htmlFile) } } } #' @importFrom knitr knit_print asis_output #' @export knit_print.sjTable <- function(input, ...) { x <- input$knitr x <- replace_umlauts(x) knitr::asis_output(x) # knitr::asis_output(input$knitr) } replace_umlauts <- function(x) { x <- gsub("\u00E4", "ä", x, fixed = TRUE, useBytes = FALSE) x <- gsub("\u00F6", "ö", x, fixed = TRUE, useBytes = FALSE) x <- gsub("\u00FC", "ü", x, fixed = TRUE, useBytes = FALSE) x <- gsub("\u00C4", "Ä", x, fixed = TRUE, useBytes = FALSE) x <- gsub("\u00D6", "Ö", x, fixed = TRUE, useBytes = FALSE) x <- gsub("\u00DC", "Ü", x, fixed = TRUE, useBytes = FALSE) x <- gsub("\u00DF", "ß", x, fixed = TRUE, useBytes = FALSE) # x <- gsub("ä", "ä", x, fixed = TRUE, useBytes = TRUE) # x <- gsub("ö", "ö", x, fixed = TRUE, useBytes = TRUE) # x <- gsub("ü", "ü", x, fixed = TRUE, useBytes = TRUE) # x <- gsub("Ä", "Ä", x, fixed = TRUE, useBytes = TRUE) # x <- gsub("Ö", "Ö", x, fixed = TRUE, useBytes = TRUE) # x <- gsub("Ü", "Ü", x, fixed = TRUE, useBytes = TRUE) # x <- gsub("ß", "ß", x, fixed = TRUE, useBytes = TRUE) x } # knitr method for grpmean() ---- #' @export knit_print.sjt_grpmean <- function(input, ...) { knitr::asis_output(pgrpmean(input, ...)$knitr) } #' @export knit_print.sjt_grpmeans <- function(input, ...) { knitr::asis_output(pgrpmeans(input, ...)$knitr) } # knitr method method for reliab_test() ---- #' @export knit_print.sjt_reliab <- function(input, ...) { knitr::asis_output(preliab(input, ...)$knitr) } # knitr method method for descr() ---- #' @export knit_print.sjt_descr <- function(input, ...) { knitr::asis_output(pdescr(input, ...)$knitr) } #' @export knit_print.sjt_grpdescr <- function(input, ...) { knitr::asis_output(pgdescr(input, ...)$knitr) } # knitr method method for equi_test() ---- #' @export knit_print.sjt_descr <- function(input, ...) { knitr::asis_output(pequi_test(input, ...)$knitr) } # knitr method for frq() ---- #' @export knit_print.sjt_frq <- function(input, ...) { knitr::asis_output(pfrq(input, ...)$knitr) } # knitr method for mwu() ---- #' @export knit_print.sjt_mwu <- function(input, ...) { knitr::asis_output(pmwu(input, ...)$knitr) } # HTMl table method for grpmean() ---- #' @export print.sjt_grpmean <- function(x, ...) { print(pgrpmean(x, ...), ...) } #' @export print.sjt_grpmeans <- function(x, ...) { print(pgrpmeans(x, ...), ...) } # HTMl table method for reliab_test() ---- #' @export print.sjt_reliab <- function(x, ...) { print(preliab(x, ...), ...) } # HTMl table method for equi_test() ---- #' @export print.sjt_equi_test <- function(x, ...) { print(pequi_test(x, ...), ...) } # HTMl table method for descr() ---- #' @export print.sjt_descr <- function(x, ...) { print(pdescr(x, ...), ...) } #' @export print.sjt_grpdescr <- function(x, ...) { print(pgdescr(x, ...), ...) } # HTMl table method for frq() ---- #' @export print.sjt_frq <- function(x, ...) { print(pfrq(x, ...), ...) } # HTMl table method for mwu() ---- #' @export print.sjt_mwu <- function(x, ...) { print(pmwu(x, ...), ...) } pgrpmean <- function(x, ...) { title <- sprintf( "Mean for %s by %s", attr(x, "dv.label", exact = TRUE), attr(x, "grp.label", exact = TRUE) ) footnote <- sprintf( "Anova: R2=%.3f; adj.R2=%.3f; F=%.3f; p=%.3f", attr(x, "r2", exact = TRUE), attr(x, "adj.r2", exact = TRUE), attr(x, "fstat", exact = TRUE), attr(x, "p.value", exact = TRUE) ) enc <- attr(x, "encoding", exact = TRUE) file <- attr(x, "file", exact = TRUE) if (is.null(enc)) enc <- "UTF-8" tab_df( x = x, title = title, footnote = footnote, col.header = NULL, show.type = FALSE, show.rownames = FALSE, show.footnote = TRUE, alternate.rows = FALSE, encoding = enc, CSS = list( css.firsttablecol = '+text-align:left;', css.lasttablerow = 'border-top:1px solid; border-bottom: double;' ), file = file, use.viewer = attr(x, "print", exact = TRUE) == "viewer", ... ) } #' @importFrom purrr map_chr pgrpmeans <- function(x, ...) { uv <- attr(x, "print", exact = TRUE) == "viewer" enc <- attr(x, "encoding", exact = TRUE) file <- attr(x, "file", exact = TRUE) if (is.null(enc)) enc <- "UTF-8" titles <- purrr::map_chr(x, ~ sprintf( "Mean for %s by %s
grouped by %s", attr(.x, "dv.label", exact = TRUE), attr(.x, "grp.label", exact = TRUE), gsub(pattern = "\n", replacement = "
", attr(.x, "group", exact = TRUE), fixed = T) )) footnotes <- purrr::map_chr(x, ~ sprintf( "Anova: R2=%.3f; adj.R2=%.3f; F=%.3f; p=%.3f", attr(.x, "r2", exact = TRUE), attr(.x, "adj.r2", exact = TRUE), attr(.x, "fstat", exact = TRUE), attr(.x, "p.value", exact = TRUE) )) tab_dfs( x = x, titles = titles, footnotes = footnotes, col.header = NULL, show.type = FALSE, show.rownames = FALSE, show.footnote = TRUE, alternate.rows = FALSE, encoding = enc, CSS = list( css.firsttablecol = '+text-align:left;', css.lasttablerow = 'border-top:1px solid; border-bottom: double;' ), file = file, use.viewer = uv, ... ) } pequi_test <- function(x, ...) { chead <- c( "Term", "H0", "% in ROPE", "HDI (95%)" ) x$inside.rope <- sprintf("%.1f%%", x$inside.rope) x$hdi <- sprintf("%.2f – %.2f", x$hdi.low, x$hdi.high) x <- dplyr::select(x, c(1:3, 6)) footnote <- sprintf( "Effect Size: %.2f · ROPE: %.2f – %.2f · Samples: %i", attr(x, "eff_size", exact = TRUE), attr(x, "rope", exact = TRUE)[1], attr(x, "rope", exact = TRUE)[2], attr(x, "nsamples", exact = TRUE) ) if (isTRUE(attr(x, "critical"))) { footnote <- paste0( footnote, "
(*) number of effective samples may be insufficient for this parameter" ) } enc <- attr(x, "encoding", exact = TRUE) file <- attr(x, "file", exact = TRUE) if (is.null(enc)) enc <- "UTF-8" tab_df( x = x, title = "Test for Practical Equivalence of Model Parameters", footnote = footnote, col.header = chead, show.type = FALSE, show.rownames = FALSE, show.footnote = TRUE, alternate.rows = FALSE, encoding = enc, CSS = list( css.firsttablecol = '+text-align:left;', css.lasttablerow = 'border-bottom: 1px solid;', css.col3 = '+text-align:right;' ), file = file, use.viewer = attr(x, "print", exact = TRUE) == "viewer", ... ) } preliab <- function(x, ...) { chead <- c( "Variable", "α if deleted", "Item Discrimination" ) enc <- attr(x, "encoding", exact = TRUE) file <- attr(x, "file", exact = TRUE) if (is.null(enc)) enc <- "UTF-8" tab_df( x = x, title = "Reliability Test", footnote = NULL, col.header = chead, show.type = FALSE, show.rownames = FALSE, show.footnote = FALSE, alternate.rows = FALSE, encoding = enc, CSS = list( css.firsttablecol = '+text-align:left;', css.lasttablerow = 'border-bottom: 1px solid;' ), file = file, use.viewer = attr(x, "print", exact = TRUE) == "viewer", ... ) } #' @importFrom purrr map_if #' @importFrom sjmisc is_float pdescr <- function(x, ...) { digits <- 2 # do we have digits argument? add.args <- lapply(match.call(expand.dots = F)$`...`, function(x) x) if ("digits" %in% names(add.args)) digits <- eval(add.args[["digits"]]) uv <- attr(x, "print", exact = TRUE) == "viewer" enc <- attr(x, "encoding", exact = TRUE) file <- attr(x, "file", exact = TRUE) if (is.null(enc)) enc <- "UTF-8" chead <- c( "Variable", "Type", "Label", "N", "Missings (%)", "Mean", "SD", "SE", "Median", "Trimmed Mean", "Range", "Skewness" ) present_columns <- c("var", "type", "label", "n", "NA.prc", "mean", "sd", "se", "md", "trimmed", "range", "skew") chead <- chead[which(present_columns %in% colnames(x))] x <- x %>% purrr::map_if(sjmisc::is_float, ~ round(.x, digits)) %>% as.data.frame() tab_df( x = x, title = "Basic descriptive statistics", footnote = NULL, col.header = chead, show.type = FALSE, show.rownames = FALSE, show.footnote = FALSE, alternate.rows = TRUE, encoding = enc, CSS = list( css.firsttablecol = '+text-align:left;', css.lasttablerow = 'border-bottom: 1px solid;', css.centeralign = 'text-align:right;', css.col2 = '+text-align:left;', css.col3 = '+text-align:left;' ), file = file, use.viewer = uv, ... ) } #' @importFrom purrr map_if map_chr map #' @importFrom sjmisc is_float pgdescr <- function(x, ...) { titles <- purrr::map_chr(x, ~ sprintf( "Basic descriptives
grouped by %s", gsub(pattern = "\n", replacement = "
", attr(.x, "group", exact = TRUE), fixed = T) )) digits <- 2 # do we have digits argument? add.args <- lapply(match.call(expand.dots = F)$`...`, function(x) x) if ("digits" %in% names(add.args)) digits <- eval(add.args[["digits"]]) uv <- attr(x, "print", exact = TRUE) == "viewer" enc <- attr(x, "encoding", exact = TRUE) file <- attr(x, "file", exact = TRUE) if (is.null(enc)) enc <- "UTF-8" chead <- c( "Variable", "Type", "Label", "N", "Missings (%)", "Mean", "SD", "SE", "Median", "Trimmed Mean", "Range", "Skewness" ) x <- x %>% purrr::map(~ purrr::map_if( .x, sjmisc::is_float, ~ round(.x, digits) ) %>% as.data.frame()) tab_dfs( x = x, titles = titles, footnotes = NULL, col.header = chead, show.type = FALSE, show.rownames = FALSE, show.footnote = FALSE, alternate.rows = TRUE, encoding = enc, CSS = list( css.firsttablecol = '+text-align:left;', css.lasttablerow = 'border-bottom: 1px solid;', css.col3 = '+text-align:left;' ), file = file, use.viewer = uv, ... ) } #' @importFrom purrr map_if map_chr map #' @importFrom dplyr n_distinct select #' @importFrom sjmisc is_empty pfrq <- function(x, ...) { uv <- attr(x, "print", exact = TRUE) == "viewer" enc <- attr(x, "encoding", exact = TRUE) file <- attr(x, "file", exact = TRUE) if (is.null(enc)) enc <- "UTF-8" titles <- purrr::map_chr(x, function(i) { ret <- "" # get variable label lab <- attr(i, "label", exact = T) vt <- attr(i, "vartype", exact = T) # fix variable type string if (!sjmisc::is_empty(vt)) vt <- sprintf(" <%s>", vt) else vt <- "" if (!is.null(lab)) ret <- sprintf("%s%s", lab, vt) # get grouping title label grp <- attr(i, "group", exact = T) if (!is.null(grp)) ret <- sprintf("%s
grouped by:
%s
", ret, grp) gsub(pattern = "\n", replacement = "
", x = ret, fixed = T) }) footnotes <- purrr::map_chr(x, ~ sprintf( "total N=%i · valid N=%i · x̄=%.2f · σ=%.2f\n", sum(.x$frq, na.rm = TRUE), sum(.x$frq[1:(nrow(.x) - 1)], na.rm = TRUE), attr(.x, "mean", exact = T), attr(.x, "sd", exact = T) ) ) x <- purrr::map(x, function(i) { if (dplyr::n_distinct(i$label) == 1 && unique(i$label) == "") i <- dplyr::select(i, -.data$label) i }) tab_dfs( x = x, titles = titles, footnotes = footnotes, col.header = NULL, show.type = FALSE, show.rownames = FALSE, show.footnote = TRUE, alternate.rows = FALSE, encoding = enc, CSS = list( css.firsttablecol = '+text-align:left;', css.lasttablerow = 'border-bottom: 1px solid;', css.col2 = 'text-align: left;', css.col3 = 'text-align: right;', css.col4 = 'text-align: right;', css.col5 = 'text-align: right;', css.col6 = 'text-align: right;' ), file = file, use.viewer = uv, ... ) } #' @importFrom stats na.omit kruskal.test pmwu <- function(x, ...) { fn <- NULL chead <- c( "Groups", "N", "Mean Rank", "Mann-Whitney U", "Wilcoxon W", "Z", "Effect Size", "p-value" ) # if we have more than 2 groups, also perfom kruskal-wallis-test if (length(unique(stats::na.omit(x$data$grp))) > 2) { kw <- stats::kruskal.test(x$data$x, x$data$grp) if (kw$p.value < 0.001) { p <- 0.001 p.string <- "<" } else { p <- kw$p.value p.string <- "=" } fn <- sprintf( "Kruskal-Wallis-Test: χ2=%.3f · df=%i · p%s%.3f", kw$statistic, kw$parameter, p.string, p ) } enc <- attr(x, "encoding", exact = TRUE) file <- attr(x, "file", exact = TRUE) if (is.null(enc)) enc <- "UTF-8" tab_df( x = x$tab.df, title = "Mann-Whitney U-Test", footnote = fn, col.header = chead, show.rownames = FALSE, show.type = FALSE, show.footnote = !is.null(fn), alternate.rows = TRUE, file = file, encoding = enc, CSS = list( css.firsttablecol = '+text-align:left;', css.lasttablerow = 'border-bottom: 1px solid;' ), use.viewer = attr(x, "print", exact = TRUE) == "viewer", ... ) } sjPlot/R/plot_type_eff.R0000644000176200001440000000724613544103540014720 0ustar liggesusers#' @importFrom ggeffects ggpredict ggeffect plot_type_eff <- function(type, model, terms, ci.lvl, pred.type, facets, show.data, jitter, geom.colors, axis.title, title, legend.title, axis.lim, case, show.legend, dot.size, line.size, ...) { if (missing(facets) || is.null(facets)) facets <- FALSE if (type == "pred") { dat <- ggeffects::ggpredict( model = model, terms = terms, ci.lvl = ci.lvl, type = pred.type, ... ) } else if (type == "emm") { dat <- ggeffects::ggemmeans( model = model, terms = terms, ci.lvl = ci.lvl, type = pred.type, ... ) } else { dat <- ggeffects::ggeffect( model = model, terms = terms, ci.lvl = ci.lvl, ... ) } if (is.null(dat)) return(NULL) # evaluate dots-arguments alpha <- .15 dodge <- .1 dot.alpha <- .5 log.y <- FALSE # save number of terms, needed later n.terms <- length(insight::find_predictors(model, component = "conditional", flatten = TRUE)) add.args <- lapply(match.call(expand.dots = F)$`...`, function(x) x) if ("alpha" %in% names(add.args)) alpha <- eval(add.args[["alpha"]]) if ("dodge" %in% names(add.args)) dodge <- eval(add.args[["dodge"]]) if ("dot.alpha" %in% names(add.args)) dot.alpha <- eval(add.args[["dot.alpha"]]) if ("log.y" %in% names(add.args)) log.y <- eval(add.args[["log.y"]]) # select color palette if (geom.colors[1] != "bw") { if (is.null(terms)) { if (facets) { geom.colors <- "bw" .ngrp <- n.terms } else { .ngrp <- 1 } } else { .ngrp <- dplyr::n_distinct(dat$group) } geom.colors <- col_check2(geom.colors, .ngrp) } p <- graphics::plot( dat, ci = !is.na(ci.lvl), facets = facets, rawdata = show.data, colors = geom.colors, use.theme = FALSE, jitter = jitter, case = case, show.legend = show.legend, dot.alpha = dot.alpha, alpha = alpha, dodge = dodge, log.y = log.y, dot.size = dot.size, line.size = line.size ) # set axis and plot titles if (!is.null(axis.title) && !is.null(terms)) { if (length(axis.title) > 1) { p <- p + labs(x = axis.title[1], y = axis.title[2]) } else { p <- p + labs(y = axis.title) } } else if (!is.null(axis.title) && is.null(terms)) { if (length(axis.title) > 1) { p <- purrr::map(p, ~ .x + labs(x = axis.title[1], y = axis.title[2])) } else { p <- purrr::map(p, ~ .x + labs(y = axis.title)) } } # set axis and plot titles if (!is.null(title) && !is.null(terms)) p <- p + ggtitle(title) else if (!is.null(title) && is.null(terms)) p <- purrr::map(p, ~ .x + ggtitle(title)) # set axis and plot titles if (!is.null(legend.title)) { if (geom.colors[1] == "bw") { p <- p + labs(linetype = legend.title) + guides(colour = "none") } else { p <- p + labs(colour = legend.title) } } # set axis limits if (!is.null(axis.lim)) { if (is.list(axis.lim)) p <- p + xlim(axis.lim[[1]]) + ylim(axis.lim[[2]]) else p <- p + ylim(axis.lim) } p } sjPlot/R/tab_model.R0000644000176200001440000015037314140543611014007 0ustar liggesusers#' @title Print regression models as HTML table #' @name tab_model #' #' @description #' \code{tab_model()} creates HTML tables from regression models. #' #' @param title String, will be used as table caption. #' @param terms Character vector with names of those terms (variables) that should #' be printed in the table. All other terms are removed from the output. If #' \code{NULL}, all terms are printed. Note that the term names must match #' the names of the model's coefficients. For factors, this means that #' the variable name is suffixed with the related factor level, and each #' category counts as one term. E.g. \code{rm.terms = "t_name [2,3]"} #' would remove the terms \code{"t_name2"} and \code{"t_name3"} (assuming #' that the variable \code{t_name} is categorical and has at least #' the factor levels \code{2} and \code{3}). Another example for the #' \emph{iris}-dataset: \code{terms = "Species"} would not work, instead #' use \code{terms = "Species [versicolor,virginica]"}. #' @param rm.terms Character vector with names that indicate which terms should #' be removed from the output Counterpart to \code{terms}. \code{rm.terms = #' "t_name"} would remove the term \emph{t_name}. Default is \code{NULL}, i.e. #' all terms are used. For factors, levels that should be removed from the plot #' need to be explicitly indicated in square brackets, and match the model's #' coefficient names, e.g. \code{rm.terms = "t_name [2,3]"} would remove the terms #' \code{"t_name2"} and \code{"t_name3"} (assuming that the variable \code{t_name} #' was categorical and has at least the factor levels \code{2} and \code{3}). #' @param keep,drop Character containing a regular expression pattern that #' describes the parameters that should be included (for \code{keep}) or excluded #' (for \code{drop}) in the returned data frame. \code{keep} may also be a #' named list of regular expressions. All non-matching parameters will be #' removed from the output. If \code{keep} has more than one element, these #' will be merged with an \code{OR} operator into a regular expression pattern #' like this: \code{"(one|two|three)"}. See further details in #' \code{?parameters::model_parameters}. #' @param pred.labels Character vector with labels of predictor variables. #' If not \code{NULL}, \code{pred.labels} will be used in the first #' table column with the predictors' names. By default, if \code{auto.label = TRUE} #' and \href{https://strengejacke.github.io/sjlabelled/articles/intro_sjlabelled.html}{data is labelled}, #' \code{\link[sjlabelled]{term_labels}} is called to retrieve the labels #' of the coefficients, which will be used as predictor labels. If data is #' not labelled, \href{https://easystats.github.io/parameters/reference/format_parameters.html}{format_parameters()} #' is used to create pretty labels. If \code{pred.labels = ""} or \code{auto.label = FALSE}, the raw #' variable names as used in the model formula are used as predictor #' labels. If \code{pred.labels} is a named vector, predictor labels (by #' default, the names of the model's coefficients) will be matched with the #' names of \code{pred.labels}. This ensures that labels always match the #' related predictor in the table, no matter in which way the predictors #' are sorted. See 'Examples'. #' @param dv.labels Character vector with labels of dependent variables of all #' fitted models. If \code{dv.labels = ""}, the row with names of dependent #' variables is omitted from the table. #' @param show.intercept Logical, if \code{TRUE}, the intercepts are printed. #' @param show.est Logical, if \code{TRUE}, the estimates are printed. #' @param show.zeroinf Logical, if \code{TRUE} and model has a zero-inflated #' model part, this is also printed to the table. #' @param show.re.var Logical, if \code{TRUE}, prints the random effect variances #' for mixed models. See \code{\link[insight]{get_variance}} for details. #' @param show.icc Logical, if \code{TRUE}, prints the intraclass correlation #' coefficient for mixed models. See \code{\link[performance]{icc}} for details. #' @param show.ngroups Logical, if \code{TRUE}, shows number of random effects groups #' for mixed models. #' @param show.dev Logical, if \code{TRUE}, shows the deviance of the model. #' @param show.loglik Logical, if \code{TRUE}, shows the log-Likelihood of the model. #' @param show.ci Either logical, and if \code{TRUE}, the confidence intervals #' is printed to the table; if \code{FALSE}, confidence intervals are #' omitted. Or numeric, between 0 and 1, indicating the range of the #' confidence intervals. #' @param show.std Indicates whether standardized beta-coefficients should #' also printed, and if yes, which type of standardization is done. #' See 'Details'. #' @param show.p Logical, if \code{TRUE}, p-values are also printed. #' @param show.se Logical, if \code{TRUE}, the standard errors are #' also printed. If robust standard errors are required, use arguments #' \code{vcov.fun}, \code{vcov.type} and \code{vcov.args} (see #' \code{\link[parameters]{standard_error_robust}} and #' \href{https://easystats.github.io/parameters/articles/model_parameters_robust.html}{this vignette} #' for details). #' @param show.r2 Logical, if \code{TRUE}, the r-squared value is also printed. #' Depending on the model, these might be pseudo-r-squared values, or Bayesian #' r-squared etc. See \code{\link[performance]{r2}} for details. #' @param show.stat Logical, if \code{TRUE}, the coefficients' test statistic #' is also printed. #' @param show.df Logical, if \code{TRUE} and \code{p.val = "kr"}, the p-values #' for linear mixed models are based on df with Kenward-Rogers approximation. #' These df-values are printed. See \code{\link[parameters]{p_value}} for details. #' @param string.pred Character vector,used as headline for the predictor column. #' Default is \code{"Predictors"}. #' @param string.est Character vector, used for the column heading of coefficients. #' Default is based on the response scale, e.g. for logistic regression models, #' \code{"Odds Ratios"} will be chosen, while for Poisson models it is #' \code{"Incidence Rate Ratios"} etc. Default if not specified is \code{"Estimate"}. #' @param string.std Character vector, used for the column heading of standardized beta coefficients. Default is \code{"std. Beta"}. #' @param string.ci Character vector, used for the column heading of confidence interval values. Default is \code{"CI"}. #' @param string.se Character vector, used for the column heading of standard error values. Default is \code{"std. Error"}. #' @param string.std_se Character vector, used for the column heading of standard error of standardized coefficients. Default is \code{"standardized std. Error"}. #' @param string.std_ci Character vector, used for the column heading of confidence intervals of standardized coefficients. Default is \code{"standardized std. Error"}. #' @param string.p Character vector, used for the column heading of p values. Default is \code{"p"}. #' @param string.std.p Character vector, used for the column heading of p values. Default is \code{"std. p"}. #' @param string.df Character vector, used for the column heading of degrees of freedom. Default is \code{"df"}. #' @param string.stat Character vector, used for the test statistic. Default is \code{"Statistic"}. #' @param string.std.stat Character vector, used for the test statistic. Default is \code{"std. Statistic"}. #' @param string.resp Character vector, used for the column heading of of the response level for multinominal or categorical models. Default is \code{"Response"}. #' @param string.intercept Character vector, used as name for the intercept parameter. Default is \code{"(Intercept)"}. #' @param strings Named character vector, as alternative to arguments like \code{string.ci} #' or \code{string.p} etc. The name (lhs) must be one of the string-indicator from #' the aforementioned arguments, while the value (rhs) is the string that is used #' as column heading. E.g., \code{strings = c(ci = "Conf.Int.", se = "std. Err")} #' would be equivalent to setting \code{string.ci = "Conf.Int.", string.se = "std. Err"}. #' @param ci.hyphen Character vector, indicating the hyphen for confidence interval range. #' May be an HTML entity. See 'Examples'. #' @param minus.sign string, indicating the minus sign for negative numbers. #' May be an HTML entity. See 'Examples'. #' @param emph.p Logical, if \code{TRUE}, significant p-values are shown bold faced. #' @param digits Amount of decimals for estimates #' @param digits.p Amount of decimals for p-values #' @param digits.rsq Amount of decimals for r-squared values #' @param digits.re Amount of decimals for random effects part of the summary table. #' @param collapse.ci Logical, if \code{FALSE}, the CI values are shown in #' a separate table column. #' @param collapse.se Logical, if \code{FALSE}, the SE values are shown in #' a separate table column. #' @param linebreak Logical, if \code{TRUE} and \code{collapse.ci = FALSE} or #' \code{collapse.se = FALSE}, inserts a line break between estimate and #' CI resp. SE values. If \code{FALSE}, values are printed in the same line #' as estimate values. #' @param show.reflvl Logical, if \code{TRUE}, an additional row is inserted to #' the table before each predictor of type \code{\link{factor}}, which will #' indicate the reference level of the related factor. #' @param show.ci50 Logical, if \code{TRUE}, for Bayesian models, a second #' credible interval is added to the table output. #' @param bootstrap Logical, if \code{TRUE}, returns bootstrapped estimates.. #' @param iterations Numeric, number of bootstrap iterations (default is 1000). #' @param seed Numeric, the number of the seed to replicate bootstrapped estimates. If \code{NULL}, uses random seed. #' @param show.fstat Logical, if \code{TRUE}, the F-statistics for each model is #' printed in the table summary. This option is not supported by all model types. #' @param show.aic Logical, if \code{TRUE}, the AIC value for each model is printed #' in the table summary. #' @param show.aicc Logical, if \code{TRUE}, the second-order AIC value for each model #' is printed in the table summary. #' @param show.obs Logical, if \code{TRUE}, the number of observations per model is #' printed in the table summary. #' @param col.order Character vector, indicating which columns should be printed #' and in which order. Column names that are excluded from \code{col.order} #' are not shown in the table output. However, column names that are included, #' are only shown in the table when the related argument (like \code{show.est} #' for \code{"estimate"}) is set to \code{TRUE} or another valid value. #' Table columns are printed in the order as they appear in \code{col.order}. #' @param df.method,p.val Method for computing degrees of freedom for p-values, #' standard errors and confidence intervals (CI). Only applies to mixed models. #' Use \code{df.method = "wald"} for a faster, but less precise computation. #' This will use the residual degrees of freedom (as returned by \code{df.residual()}) #' for linear mixed models, and \code{Inf} degrees if freedom for all other #' model families. \code{df.method = "kenward"} (or \code{df.method = "kr"}) #' uses Kenward-Roger approximation for the degrees of freedom. #' \code{df.method = "satterthwaite"} uses Satterthwaite's approximation and #' \code{"ml1"} uses a "m-l-1" heuristic see \code{\link[parameters]{degrees_of_freedom}} #' for details). Use \code{show.df = TRUE} to show the approximated degrees of freedom #' for each coefficient. #' @param p.style Character, indicating if p-values should be printed as #' numeric value (\code{"numeric"}), as 'stars' (asterisks) only (\code{"stars"}), #' or scientific (\code{"scientific"}). Scientific and numeric style can be #' combined with "stars", e.g. \code{"numeric_stars"} #' @param CSS A \code{\link{list}} with user-defined style-sheet-definitions, #' according to the \href{http://www.w3.org/Style/CSS/}{official CSS syntax}. #' See 'Details' or \href{https://strengejacke.github.io/sjPlot/articles/table_css.html}{this package-vignette}. #' @param file Destination file, if the output should be saved as file. #' If \code{NULL} (default), the output will be saved as temporary file and #' opened either in the IDE's viewer pane or the default web browser. #' @param use.viewer Logical, if \code{TRUE}, the HTML table is shown in the IDE's #' viewer pane. If \code{FALSE} or no viewer available, the HTML table is #' opened in a web browser. #' #' @inheritParams plot_models #' @inheritParams plot_model #' @inheritParams tab_df #' #' @return Invisibly returns #' \itemize{ #' \item the web page style sheet (\code{page.style}), #' \item the web page content (\code{page.content}), #' \item the complete html-output (\code{page.complete}) and #' \item the html-table with inline-css for use with knitr (\code{knitr}) #' } #' for further use. #' #' @note The HTML tables can either be saved as file and manually opened (use argument \code{file}) or #' they can be saved as temporary files and will be displayed in the RStudio Viewer pane (if working with RStudio) #' or opened with the default web browser. Displaying resp. opening a temporary file is the #' default behaviour (i.e. \code{file = NULL}). #' \cr \cr #' Examples are shown in these three vignettes: #' \href{https://strengejacke.github.io/sjPlot/articles/tab_model_estimates.html}{Summary of Regression Models as HTML Table}, #' \href{https://strengejacke.github.io/sjPlot/articles/tab_mixed.html}{Summary of Mixed Models as HTML Table} and #' \href{https://strengejacke.github.io/sjPlot/articles/tab_bayes.html}{Summary of Bayesian Models as HTML Table}. #' #' @details #' \subsection{Standardized Estimates}{ #' Default standardization is done by completely refitting the model on the #' standardized data. Hence, this approach is equal to standardizing the #' variables before fitting the model, which is particularly recommended for #' complex models that include interactions or transformations (e.g., polynomial #' or spline terms). When \code{show.std = "std2"}, standardization of estimates #' follows \href{http://www.stat.columbia.edu/~gelman/research/published/standardizing7.pdf}{Gelman's (2008)} #' suggestion, rescaling the estimates by dividing them by two standard deviations #' instead of just one. Resulting coefficients are then directly comparable for #' untransformed binary predictors. For backward compatibility reasons, #' \code{show.std} also may be a logical value; if \code{TRUE}, normal standardized #' estimates are printed (same effect as \code{show.std = "std"}). Use #' \code{show.std = NULL} (default) or \code{show.std = FALSE}, if no standardization #' is required. #' } #' \subsection{How do I use \code{CSS}-argument?}{ #' With the \code{CSS}-argument, the visual appearance of the tables #' can be modified. To get an overview of all style-sheet-classnames #' that are used in this function, see return value \code{page.style} for details. #' Arguments for this list have following syntax: #' \enumerate{ #' \item the class-names with \code{"css."}-prefix as argument name and #' \item each style-definition must end with a semicolon #' } #' You can add style information to the default styles by using a + (plus-sign) as #' initial character for the argument attributes. Examples: #' \itemize{ #' \item \code{css.table = 'border:2px solid red;'} for a solid 2-pixel table border in red. #' \item \code{css.summary = 'font-weight:bold;'} for a bold fontweight in the summary row. #' \item \code{css.lasttablerow = 'border-bottom: 1px dotted blue;'} for a blue dotted border of the last table row. #' \item \code{css.colnames = '+color:green'} to add green color formatting to column names. #' \item \code{css.arc = 'color:blue;'} for a blue text color each 2nd row. #' \item \code{css.caption = '+color:red;'} to add red font-color to the default table caption style. #' } #' } # #' @importFrom dplyr full_join select if_else mutate #' @importFrom purrr reduce map2 map_if map_df compact map_lgl map_chr flatten_chr #' @importFrom sjlabelled response_labels term_labels #' @importFrom sjmisc word_wrap var_rename add_columns add_case #' @importFrom insight model_info is_multivariate find_random get_data find_predictors #' @importFrom performance r2 variance_decomposition #' @importFrom stats nobs setNames #' @importFrom rlang .data #' @importFrom utils packageVersion #' @export tab_model <- function( ..., transform, show.intercept = TRUE, show.est = TRUE, show.ci = .95, show.ci50 = FALSE, show.se = NULL, show.std = NULL, show.p = TRUE, show.stat = FALSE, show.df = FALSE, show.zeroinf = TRUE, show.r2 = TRUE, show.icc = TRUE, show.re.var = TRUE, show.ngroups = TRUE, show.fstat = FALSE, show.aic = FALSE, show.aicc = FALSE, show.dev = FALSE, show.loglik = FALSE, show.obs = TRUE, show.reflvl = FALSE, terms = NULL, rm.terms = NULL, order.terms = NULL, keep = NULL, drop = NULL, title = NULL, pred.labels = NULL, dv.labels = NULL, wrap.labels = 25, bootstrap = FALSE, iterations = 1000, seed = NULL, robust = FALSE, vcov.fun = NULL, vcov.type = c("HC3", "const", "HC", "HC0", "HC1", "HC2", "HC4", "HC4m", "HC5", "CR0", "CR1", "CR1p", "CR1S", "CR2", "CR3"), vcov.args = NULL, string.pred = "Predictors", string.est = "Estimate", string.std = "std. Beta", string.ci = "CI", string.se = "std. Error", string.std_se = "standardized std. Error", string.std_ci = "standardized CI", string.p = "p", string.std.p = "std. p", string.df = "df", string.stat = "Statistic", string.std.stat = "std. Statistic", string.resp = "Response", string.intercept = "(Intercept)", strings = NULL, ci.hyphen = " – ", minus.sign = "-", collapse.ci = FALSE, collapse.se = FALSE, linebreak = TRUE, col.order = c( "est", "se", "std.est", "std.se", "ci", "std.ci", "ci.inner", "ci.outer", "stat", "std.stat", "p", "std.p", "df.error", "response.level" ), digits = 2, digits.p = 3, digits.rsq = 3, digits.re = 2, emph.p = TRUE, p.val = NULL, df.method = NULL, p.style = c("numeric", "stars", "numeric_stars", "scientific", "scientific_stars"), p.threshold = c(0.05, 0.01, 0.001), p.adjust = NULL, case = "parsed", auto.label = TRUE, prefix.labels = c("none", "varname", "label"), bpe = "median", CSS = css_theme("regression"), file = NULL, use.viewer = TRUE, encoding = "UTF-8" ) { if (!missing(df.method)) { p.val <- df.method } if (!is.null(p.val)) { p.val <- match.arg(p.val, choices = c("wald", "profile", "kenward", "kr", "satterthwaite", "ml1", "betwithin")) } p.style <- match.arg(p.style) prefix.labels <- match.arg(prefix.labels) vcov.type <- match.arg(vcov.type) change_string_est <- !missing(string.est) # if we prefix labels, use different default for case conversion, # else the separating white spaces after colon are removed. if (missing(case)) { if (prefix.labels == "none" && !show.reflvl) case <- "parsed" else case <- NULL } if (p.style == "stars") show.p <- FALSE # default robust? if (isTRUE(robust)) { vcov.type <- "HC3" vcov.fun <- "vcovHC" } # check se-argument vcov.fun <- check_se_argument(se = vcov.fun, type = NULL) models <- list(...) if (length(class(models[[1]])) == 1 && class(models[[1]]) == "list") models <- lapply(models[[1]], function(x) x) names(models) <- unlist(lapply( match.call(expand.dots = F)$`...`, function(.x) deparse(.x, width.cutoff = 500L)) ) auto.transform <- missing(transform) ci.lvl <- ifelse(is.null(show.ci), .95, show.ci) copos <- which("est" == col.order) if (!sjmisc::is_empty(copos)) col.order[copos] <- "estimate" copos <- which("se" == col.order) if (!sjmisc::is_empty(copos)) col.order[copos] <- "std.error" copos <- which("ci" == col.order) if (!sjmisc::is_empty(copos)) col.order[copos] <- "conf.int" copos <- which("std.est" == col.order) if (!sjmisc::is_empty(copos)) col.order[copos] <- "std.estimate" copos <- which("std.se" == col.order) if (!sjmisc::is_empty(copos)) col.order[copos] <- "std.se" copos <- which("std.ci" == col.order) if (!sjmisc::is_empty(copos)) col.order[copos] <- "std.conf.int" copos <- which("p" == col.order) if (!sjmisc::is_empty(copos)) col.order[copos] <- "p.value" copos <- which("std.p" == col.order) if (!sjmisc::is_empty(copos)) col.order[copos] <- "std.p.value" copos <- which("stat" == col.order) if (!sjmisc::is_empty(copos)) col.order[copos] <- "statistic" copos <- which("std.stat" == col.order) if (!sjmisc::is_empty(copos)) col.order[copos] <- "std.statistic" # match strings, to label the default strings in the table, # like "Estimate", "CI" etc. if (!sjmisc::is_empty(strings) && !is.null(names(strings))) { s.names <- names(strings) if ("pred" %in% s.names) string.pred <- strings[["pred"]] if ("est" %in% s.names) string.est <- strings[["est"]] if ("std" %in% s.names) string.std <- strings[["std"]] if ("ci" %in% s.names) string.ci <- strings[["ci"]] if ("se" %in% s.names) string.se <- strings[["se"]] if ("std_se" %in% s.names) string.std_se <- strings[["std_se"]] if ("std_ci" %in% s.names) string.std_ci <- strings[["std_ci"]] if ("p" %in% s.names) string.p <- strings[["p"]] if ("std.p" %in% s.names) string.std.p <- strings[["std.p"]] if ("df" %in% s.names) string.df <- strings[["df"]] if ("stat" %in% s.names) string.stat <- strings[["stat"]] if ("std.stat" %in% s.names) string.std.stat <- strings[["std.stat"]] if ("resp" %in% s.names) string.resp <- strings[["resp"]] if ("intercept" %in% s.names) string.intercept <- strings[["intercept"]] } model.list <- purrr::map2( models, 1:length(models), function(model, i) { # get info on model family fam.info <- insight::model_info(model) if (insight::is_multivariate(model)) fam.info <- fam.info[[1]] # check whether estimates should be transformed or not if (auto.transform) { if (fam.info$is_linear || identical(fam.info$link_function, "identity")) transform <- NULL else transform <- "exp" } # get tidy output of summary ---- dat <- tidy_model( model = model, ci.lvl = ci.lvl, tf = transform, type = "est", bpe = bpe, robust = list(vcov.fun = vcov.fun, vcov.type = vcov.type, vcov.args = vcov.args), facets = FALSE, show.zeroinf = show.zeroinf, p.val = p.val, bootstrap = bootstrap, iterations = iterations, seed = seed, p_adjust = p.adjust, keep = keep, drop = drop ) # transform estimates if (!is.stan(model) && !is.null(transform)) { funtrans <- match.fun(transform) dat[["estimate"]] <- funtrans(dat[["estimate"]]) dat[["conf.low"]] <- funtrans(dat[["conf.low"]]) dat[["conf.high"]] <- funtrans(dat[["conf.high"]]) dat[["std.error"]] <- dat[["std.error"]] * dat[["estimate"]] } # merge CI columns dat <- dat %>% dplyr::mutate(conf.int = sprintf( "%.*f%s%.*f", digits, .data$conf.low, ci.hyphen, digits, .data$conf.high )) %>% dplyr::select(-.data$conf.low, -.data$conf.high) # get inner probability (i.e. 2nd CI for Stan-models) ---- if (is.stan(model)) { dat <- dat %>% sjmisc::var_rename(conf.int = "ci.outer") %>% dplyr::mutate(ci.inner = sprintf( "%.*f%s%.*f", digits, .data$conf.low50, ci.hyphen, digits, .data$conf.high50 )) %>% dplyr::select(-.data$conf.low50, -.data$conf.high50) } # tidy output of standardized values ---- if (!is.null(show.std) && !is.stan(model)) { std_method <- switch(show.std, "std" = "refit", "std2" = "2sd", "") tmp_dat <- tidy_model( model = model, ci.lvl = ci.lvl, tf = transform, type = "est", bpe = bpe, robust = list(vcov.fun = vcov.fun, vcov.type = vcov.type, vcov.args = vcov.args), facets = FALSE, show.zeroinf = show.zeroinf, p.val = p.val, p_adjust = p.adjust, standardize = std_method, bootstrap = bootstrap, iterations = iterations, seed = seed, keep = keep, drop = drop ) %>% format_p_values(p.style, digits.p, emph.p, p.threshold) %>% sjmisc::var_rename( estimate = "std.estimate", std.error = "std.se", conf.low = "std.conf.low", conf.high = "std.conf.high", p.value = "std.p.value", statistic = "std.statistic", p.stars = "std.p.stars" ) %>% dplyr::select(-1) # transform estimates if (!is.stan(model) && !is.null(transform)) { funtrans <- match.fun(transform) tmp_dat[["std.estimate"]] <- funtrans(tmp_dat[["std.estimate"]]) tmp_dat[["std.conf.low"]] <- funtrans(tmp_dat[["std.conf.low"]]) tmp_dat[["std.conf.high"]] <- funtrans(tmp_dat[["std.conf.high"]]) tmp_dat[["std.se"]] <- tmp_dat[["std.se"]] * tmp_dat[["std.estimate"]] } dat <- tmp_dat %>% sjmisc::add_columns(dat) %>% dplyr::mutate(std.conf.int = sprintf( "%.*f%s%.*f", digits, .data$std.conf.low, ci.hyphen, digits, .data$std.conf.high )) %>% dplyr::select(-.data$std.conf.low, -.data$std.conf.high) # if t-statistic is the same for standardized and unstandardized model # remove standardized; ignore intercept if (all(round(dat$statistic[-1], 3) == round(dat$std.statistic[-1], 3))) { dat <- dat %>% dplyr::select(-.data$std.statistic, -.data$std.p.value) } } # format p values for unstandardized model dat <- format_p_values(dat, p.style, digits.p, emph.p, p.threshold) # add asterisks to estimates ---- if (grepl("stars", p.style)) { if (obj_has_name(dat, "estimate")) dat$estimate <- sprintf("%.*f %s", digits, dat$estimate, dat$p.stars) if (!show.est && obj_has_name(dat, "std.estimate")) { dat$std.estimate <- sprintf("%.*f %s", digits, dat$std.estimate, dat$std.p.stars) dat <- dplyr::select(dat, -.data$std.p.stars) } } dat <- dplyr::select(dat, -.data$p.stars) # switch column for p-value and conf. int. ---- dat <- dat[, sort_columns(colnames(dat), is.stan(model), col.order)] # add suffix to column names, so we can distinguish models later cn <- colnames(dat)[2:ncol(dat)] colnames(dat)[2:ncol(dat)] <- sprintf("%s_%i", cn, i) # for HTML, convert numerics to character ---- dat <- dat %>% purrr::map_if(is.numeric, ~ sprintf("%.*f", digits, .x)) %>% as.data.frame(stringsAsFactors = FALSE) # remove 2nd HDI if requested ---- if (!show.ci50) dat <- dplyr::select(dat, -string_starts_with("ci.inner", colnames(dat))) ## TODO optionally insert linebreak for new-line-CI / SE # merge estimates and CI / SE columns, if requested ---- if (collapse.ci) { if (linebreak) lb <- "
" else lb <- " " est.cols <- string_starts_with("estimate", x = colnames(dat)) dat[[est.cols]] <- sprintf("%s%s(%s)", dat[[est.cols]], lb, dat[[est.cols + 2]]) # for stan models, we also have 50% HDI if (!sjmisc::is_empty(string_starts_with("ci", x = colnames(dat)))) { if (isTRUE(show.ci50)) { dat <- dplyr::select(dat, -string_starts_with("ci.inner", x = colnames(dat))) dat[[est.cols]] <- sprintf("%s%s(%s)", dat[[est.cols]], lb, dat[[est.cols + 2]]) } dat <- dplyr::select(dat, -string_starts_with("ci.outer", x = colnames(dat))) } else { dat <- dplyr::select(dat, -string_starts_with("conf.int", x = colnames(dat))) } std.cols <- string_starts_with("std.estimate", x = colnames(dat)) if (!sjmisc::is_empty(std.cols)) { dat[[std.cols]] <- sprintf("%s%s(%s)", dat[[std.cols]], lb, dat[[std.cols + 2]]) dat <- dplyr::select(dat, -string_starts_with("std.conf.int", x = colnames(dat))) } } if (collapse.se) { if (linebreak) lb <- "
" else lb <- " " est.cols <- string_starts_with("estimate", x = colnames(dat)) dat[[est.cols]] <- sprintf("%s%s(%s)", dat[[est.cols]], lb, dat[[est.cols + 1]]) dat <- dplyr::select(dat, -string_starts_with("std.error", x = colnames(dat))) std.cols <- string_starts_with("std.estimate", x = colnames(dat)) if (!sjmisc::is_empty(std.cols)) { dat[[std.cols]] <- sprintf("%s%s(%s)", dat[[std.cols]], lb, dat[[std.cols + 1]]) dat <- dplyr::select(dat, -string_starts_with("std.se", x = colnames(dat))) } } # replace minus signs dat[] <- lapply(dat, function(i) gsub("-(\\d)(.*)", paste0(minus.sign, "\\1\\2"), i)) # handle zero-inflation part ---- zidat <- NULL wf <- string_starts_with("wrap.facet", x = colnames(dat)) if (!sjmisc::is_empty(wf)) { zi <- which(dat[[wf]] %in% c("Zero-Inflated Model", "Zero Inflation Model", "zero_inflated", "zi")) if (show.zeroinf && !sjmisc::is_empty(zi)) { zidat <- dat %>% dplyr::slice(!! zi) %>% dplyr::select(!! -wf) } if (!sjmisc::is_empty(zi)) dat <- dplyr::slice(dat, !! -zi) dat <- dplyr::select(dat, !! -wf) } # Add no of observations statistic ---- n_obs <- NULL if (show.obs) { n_obs <- get_observations(model) } vars <- vars_brms <- NULL # extract variance components ---- if ((show.icc || show.re.var || show.r2) && is_mixed_model(model)) { if (inherits(model, "brmsfit")) { vars <- suppressWarnings(insight::get_variance(model)) if (is.null(vars)) { vars_brms <- tryCatch( { performance::variance_decomposition(model) }, error = function(e) { NULL } ) if (!is.null(vars_brms)) { vars$var.intercept <- attr(vars_brms, "var_rand_intercept") vars$var.residual <- attr(vars_brms, "var_residual") } } } else { vars <- suppressWarnings(insight::get_variance(model)) } } else { vars <- NULL } # sanity check for models currently not supported by "get_variance()" if (!is.null(vars) && length(vars) == 1 && is.na(vars)) vars <- NULL # Add ICC statistic ---- icc <- NULL if (show.icc && is_mixed_model(model) && !is.null(vars) && !all(is.na(vars))) { if (inherits(model, "brmsfit") && !is.null(vars_brms)) { icc <- list(icc.adjusted = vars_brms$ICC_decomposed) } else { icc <- list(icc.adjusted = vars$var.random / (vars$var.random + vars$var.residual)) } } # Add r-squared statistic ---- rsq <- NULL if (show.r2 && !insight::is_multivariate(model)) { # if marginal and conditional r-squared already have been computed # via adjusted ICC, use these results and avoid time consuming # multiple computation if (is_mixed_model(model)) { if (inherits(model, "brmsfit")) { rsqdummy <- tryCatch( { suppressWarnings(performance::r2(model)) }, error = function(x) { NULL } ) if (!is.null(rsqdummy)) { rsq <- list( `Marginal R2` = rsqdummy$R2_Bayes_marginal, `Conditional R2` = rsqdummy$R2_Bayes ) } } else if (!is.null(vars)) { if (is.null(vars$var.random)) { rsq <- list( `Marginal R2` = vars$var.fixed / (vars$var.fixed + vars$var.residual), `Conditional R2` = NA ) } else { rsq <- list( `Marginal R2` = vars$var.fixed / (vars$var.fixed + vars$var.random + vars$var.residual), `Conditional R2` = (vars$var.fixed + vars$var.random) / (vars$var.fixed + vars$var.random + vars$var.residual) ) } } } else { rsq <- tryCatch( { suppressWarnings(performance::r2(model)) }, error = function(x) { NULL } ) # fix names of r-squared values if (!is.null(rsq)) { rnames <- sub("_", " ", names(rsq)) names(rsq) <- rnames } } } # Add number of random effect groups ---- n_re_grps <- NULL if (show.ngroups && is_mixed_model(model)) { rand_eff <- insight::get_data(model)[, insight::find_random(model, split_nested = TRUE, flatten = TRUE), drop = FALSE] n_re_grps <- sapply(rand_eff, function(.i) length(unique(.i, na.rm = TRUE))) names(n_re_grps) <- sprintf("ngrps.%s", names(n_re_grps)) } # Add deviance and AIC statistic ---- dev <- NULL if (show.dev) dev <- model_deviance(model) aic <- NULL if (show.aic) aic <- model_aic(model) aicc <- NULL if (show.aicc) aicc <- model_aicc(model) loglik <- NULL if (show.loglik) loglik <- model_loglik(model) ## TODO add F-Statistic # fix brms coefficient names if (inherits(model, "brmsfit")) { dat$term <- gsub("^b_", "", dat$term) if (!is.null(zidat)) zidat$term <- gsub("^b_", "", zidat$term) } # check if Intercept should be renamed... if (string.intercept != "(Intercept)") { intercepts <- which(dat$term == "(Intercept)") if (!sjmisc::is_empty(intercepts)) { dat$term[intercepts] <- string.intercept } if (!is.null(zidat)) { intercepts <- which(zidat$term == "(Intercept)") if (!sjmisc::is_empty(intercepts)) { zidat$term[intercepts] <- string.intercept } } } list( dat = dat, transform = transform, zeroinf = zidat, rsq = rsq, n_obs = n_obs, icc = icc, dev = dev, aic = aic, variances = vars, n_re_grps = n_re_grps, loglik = loglik, aicc = aicc ) } ) # join all model data frames and convert to character ---- na.vals <- c( "NA", sprintf("NA%sNA", ci.hyphen), sprintf("NA (NA%sNA)", ci.hyphen), sprintf("NA (NA%sNA) (NA)", ci.hyphen) ) # we have data for fixed effects and zero inflation part as # well as transformation of coefficients in a list, so separate # them out into own objects model.data <- purrr::map(model.list, ~.x[[1]]) transform.data <- purrr::map(model.list, ~.x[[2]]) zeroinf.data <- purrr::map(model.list, ~.x[[3]]) rsq.data <- purrr::map(model.list, ~.x[[4]]) n_obs.data <- purrr::map(model.list, ~.x[[5]]) icc.data <- purrr::map(model.list, ~.x[[6]]) dev.data <- purrr::map(model.list, ~.x[[7]]) aic.data <- purrr::map(model.list, ~.x[[8]]) variance.data <- purrr::map(model.list, ~.x[[9]]) ngrps.data <- purrr::map(model.list, ~.x[[10]]) loglik.data <- purrr::map(model.list, ~.x[[11]]) aicc.data <- purrr::map(model.list, ~.x[[12]]) is.zeroinf <- purrr::map_lgl(model.list, ~ !is.null(.x[[3]])) zeroinf.data <- purrr::compact(zeroinf.data) # make sure we don't have zi-data if not wanted if (!show.zeroinf) zeroinf.data <- NULL # sort multivariate response models by response level model.data <- purrr::map(model.data, function(.x) { resp.col <- string_starts_with("response.level", x = colnames(.x)) if (!sjmisc::is_empty(resp.col)) .x[order(match(.x[[resp.col]], unique(.x[[resp.col]]))), ] else .x }) # if only one multivariate response model, split data # to print models side by side, and update labels of # dependent variables show.response <- TRUE if (length(model.data) == 1) { fi <- insight::model_info(models[[1]]) if (insight::is_multivariate(models[[1]])) fi <- fi[[1]] if (insight::is_multivariate(models[[1]]) || fi$is_categorical) { show.response <- FALSE if (fi$is_categorical) { dv.labels <- sprintf( "%s: %s", insight::find_response(models[[1]]), unique(model.data[[1]][["response.level_1"]]) ) model.data <- split(model.data[[1]], model.data[[1]]["response.level_1"]) } else { dv.labels <- insight::find_response(models[[1]]) model.data <- split(model.data[[1]], model.data[[1]]["response.level_1"]) dv.labels <- dv.labels[match(names(dv.labels), names(model.data))] dv.labels <- sjmisc::word_wrap(dv.labels, wrap = wrap.labels, linesep = "
") } model.data <- purrr::map2(model.data, 1:length(model.data), function(x, y) { colnames(x) <- gsub( pattern = "_1", replacement = sprintf("_%i", y), x = colnames(x) ) x }) } } # Join all models into one data frame, and replace NA by empty strings dat <- model.data %>% purrr::reduce(~ dplyr::full_join(.x, .y, by = "term")) %>% purrr::map_df(~ dplyr::if_else(.x %in% na.vals | is.na(.x), "", .x)) # remove unwanted columns and rows ---- dat <- remove_unwanted( dat, show.intercept, show.est, show.std, show.ci, show.se, show.stat, show.p, show.df, show.response, terms, rm.terms ) # same for zero-inflated parts ---- zeroinf <- NULL if (!sjmisc::is_empty(zeroinf.data)) { zeroinf <- zeroinf.data %>% purrr::reduce(~ dplyr::full_join(.x, .y, by = "term")) %>% purrr::map_df(~ dplyr::if_else(.x %in% na.vals | is.na(.x), "", .x)) zeroinf <- remove_unwanted( zeroinf, show.intercept, show.est, show.std, show.ci, show.se, show.stat, show.p, show.df, show.response, terms, rm.terms ) } # get default labels for dv and terms ---- if (isTRUE(auto.label) && sjmisc::is_empty(pred.labels)) { if (.labelled_model_data(models) || any(sapply(models, is.stan)) || isTRUE(show.reflvl)) { pred.labels <- sjlabelled::term_labels(models, case = case, mark.cat = TRUE, prefix = prefix.labels) category.values <- attr(pred.labels, "category.value") # remove random effect labels re_terms <- unlist(sapply( models, insight::find_predictors, effects = "random", component = "all", flatten = TRUE )) if (!is.null(re_terms)) { pred.labels.tmp <- sjlabelled::term_labels(models, case = case, mark.cat = TRUE, prefix = "varname") for (.re in re_terms) { found <- grepl(paste0("^", .re, ":"), pred.labels.tmp) if (any(found)) { pred.labels <- pred.labels[!found] category.values <- category.values[!found] pred.labels.tmp <- pred.labels.tmp[!found] } } } no.dupes <- !duplicated(names(pred.labels)) pred.labels <- prepare.labels( x = pred.labels[no.dupes], grp = show.reflvl, categorical = category.values[no.dupes], models = models ) } else { pred.labels <- NULL for (pl_counter in 1:length(models)) { pred.labels <- c(pred.labels, parameters::format_parameters(models[[pl_counter]])) } pred.labels <- pred.labels[!duplicated(names(pred.labels))] show.reflvl <- FALSE } } else { # no automatic grouping of table rows for categorical variables # when user supplies own labels show.reflvl <- FALSE } # to insert "header" rows for categorical variables, we need to # save the original term names first. # remember.terms <- dat$term # named vector for predictor labels means we try to match labels # with model terms if (!sjmisc::is_empty(pred.labels)) { if (!is.null(names(pred.labels))) { labs <- sjmisc::word_wrap(pred.labels, wrap = wrap.labels, linesep = "
") if (show.reflvl) { pl <- pred.labels dupes <- which(pred.labels == names(pred.labels)) if (!sjmisc::is_empty(dupes)) pl <- pl[-dupes] dat <- merge(dat, data.frame(term = names(pl)), by = "term", all = TRUE) # resort, in case reference level is alphabetically after other categories found <- match(names(pl), dat$term) dat[sort(found), ] <- dat[found, ] refs <- is.na(dat[, 2]) } else { refs <- NULL } # some labels may not match. in this case, we only need to replace those # elements in the vector that match a specific label, but # at the correct position inside "dat$term" tr <- 1:nrow(dat) find.matches <- match(dat$term, names(pred.labels)) find.na <- which(is.na(find.matches)) if (!sjmisc::is_empty(find.na)) tr <- tr[-find.na] rp <- as.vector(stats::na.omit(find.matches)) dat$term[tr] <- unname(labs[rp]) if (!is.null(refs)) { dat[refs, 2:ncol(dat)] <- "" est.cols <- if (show.est) grepl("^estimate", colnames(dat)) else if (show.std) grepl("^std.estimate", colnames(dat)) else NULL if (!is.null(est.cols)) dat[refs, est.cols] <- "Reference" } # also label zero-inflated part if (!is.null(zeroinf)) { tr <- 1:nrow(zeroinf) find.matches <- match(zeroinf$term, names(pred.labels)) find.na <- which(is.na(find.matches)) if (!sjmisc::is_empty(find.na)) tr <- tr[-find.na] rp <- as.vector(stats::na.omit(find.matches)) zeroinf$term[tr] <- unname(labs[rp]) } } else { if (length(pred.labels) == nrow(dat)) dat$term <- pred.labels else message("Length of `pred.labels` does not equal number of predictors, no labelling applied.") } } if (isTRUE(auto.label) && is.null(dv.labels)) { dv.labels <- sjmisc::word_wrap( sjlabelled::response_labels(models, case = case), wrap = wrap.labels, linesep = "
" ) } else if (is.null(dv.labels)) { dv.labels <- purrr::map(models, insight::find_response) %>% purrr::flatten_chr() } # does user want a specific order for terms? if (!is.null(order.terms)) { if (length(order.terms) == nrow(dat)) { dat <- dat[order.terms, ] } else { message("Number of values in `order.terms` does not match number of terms. Terms are not sorted.") } } # get proper column header labels ---- col.header <- purrr::map_chr(colnames(dat), function(x) { pos <- grep("^estimate_", x) if (!sjmisc::is_empty(pos)) { i <- as.numeric(sub("estimate_", "", x = x, fixed = T)) if (insight::is_multivariate(models[[1]])) mr <- i else mr <- NULL if (change_string_est && !sjmisc::is_empty(string.est)) { x <- string.est } else if (i <= length(models)) { x <- estimate_axis_title( models[[i]], axis.title = NULL, type = "est", transform = transform.data[[i]], multi.resp = mr, include.zeroinf = FALSE ) } else if (length(models) == 1) { x <- estimate_axis_title( models[[1]], axis.title = NULL, type = "est", transform = transform.data[[1]], multi.resp = mr, include.zeroinf = FALSE ) } else { x <- string.est } } pos <- grep("^term", x) if (!sjmisc::is_empty(pos)) x <- string.pred pos <- grep("^conf.int", x) if (!sjmisc::is_empty(pos)) x <- string.ci pos <- grep("^std.error", x) if (!sjmisc::is_empty(pos)) x <- string.se pos <- grep("^std.estimate", x) if (!sjmisc::is_empty(pos)) x <- string.std pos <- grep("^std.se", x) if (!sjmisc::is_empty(pos)) x <- string.std_se pos <- grep("^std.conf.int", x) if (!sjmisc::is_empty(pos)) x <- string.std_ci pos <- grep("^p.value", x) if (!sjmisc::is_empty(pos)) x <- string.p pos <- grep("^std.p.value", x) if (!sjmisc::is_empty(pos)) x <- string.std.p pos <- grep("^df", x) if (!sjmisc::is_empty(pos)) x <- string.df pos <- grep("^statistic", x) if (!sjmisc::is_empty(pos)) x <- string.stat pos <- grep("^std.statistic", x) if (!sjmisc::is_empty(pos)) x <- string.std.stat pos <- grep("^response.level", x) if (!sjmisc::is_empty(pos)) x <- string.resp pos <- grep("^ci.inner", x) if (!sjmisc::is_empty(pos)) x <- "CI (50%)" pos <- grep("^ci.outer", x) if (!sjmisc::is_empty(pos)) x <- sprintf("CI (%i%%)", round(100 * show.ci)) x }) if (grepl("stars", p.style)) footnote <- sprintf( "* p<%s   ** p<%s   *** p<%s", format(p.threshold[1]), format(p.threshold[2]), format(p.threshold[3]) ) else footnote <- NULL tab_model_df( x = dat, zeroinf = zeroinf, is.zeroinf = is.zeroinf, title = title, col.header = col.header, dv.labels = dv.labels, rsq.list = rsq.data, n_obs.list = n_obs.data, icc.list = icc.data, dev.list = dev.data, aic.list = aic.data, aicc.list = aicc.data, variance.list = variance.data, ngrps.list = ngrps.data, loglik.list = loglik.data, n.models = length(model.list), show.re.var = show.re.var, show.icc = show.icc, CSS = CSS, file = file, use.viewer = use.viewer, footnote = footnote, digits.rsq = digits.rsq, digits.re = digits.re, encoding = encoding ) } #' @importFrom stats na.omit sort_columns <- function(x, is.stan, col.order) { ## TODO check code for multiple response models ## TODO allow custom sorting reihe <- c( "term", "estimate", "std.error", "std.estimate", "std.se", "conf.int", "std.conf.int", "ci.inner", "ci.outer", "statistic", "p.value", "df.error", "wrap.facet", "response.level" ) # fix args if (sjmisc::is_empty(col.order)) col.order <- reihe if (col.order[1] != "term") col.order <- c("term", col.order) if (!("wrap.facet" %in% col.order)) col.order <- c(col.order, "wrap.facet") if (is.stan) { pcol <- which(col.order == "p.value") if (!sjmisc::is_empty(pcol)) col.order <- col.order[-pcol] } as.vector(stats::na.omit(match(col.order, x))) } #' @importFrom dplyr select slice remove_unwanted <- function(dat, show.intercept, show.est, show.std, show.ci, show.se, show.stat, show.p, show.df, show.response, terms, rm.terms) { if (!show.intercept) { ints1 <- string_contains("(Intercept", x = dat$term) ints2 <- string_contains("b_Intercept", x = dat$term) ints3 <- string_contains("b_zi_Intercept", x = dat$term) ints4 <- which(dat$term %in% "Intercept") ints <- c(ints1, ints2, ints3, ints4) if (!sjmisc::is_empty(ints)) dat <- dplyr::slice(dat, !! -ints) } if (show.est == FALSE) { dat <- dplyr::select( dat, -string_starts_with("estimate", x = colnames(dat)), -string_starts_with("conf", x = colnames(dat)), -string_starts_with("std.error", x = colnames(dat)) ) } if (is.null(show.std) || show.std == FALSE) { dat <- dplyr::select(dat, -string_starts_with("std.estimate", x = colnames(dat))) } if (is.null(show.ci) || show.ci == FALSE) { dat <- dplyr::select( dat, -string_starts_with("conf", x = colnames(dat)), -string_starts_with("std.conf", x = colnames(dat)), -string_starts_with("ci", x = colnames(dat)) ) } if (is.null(show.se) || show.se == FALSE) { dat <- dplyr::select( dat, -string_starts_with("std.error", x = colnames(dat)), -string_starts_with("std.se", x = colnames(dat)) ) } if (show.stat == FALSE) { dat <- dplyr::select(dat, -string_starts_with("statistic", x = colnames(dat)), -string_starts_with("std.statistic", x = colnames(dat))) } if (show.response == FALSE) { dat <- dplyr::select(dat, -string_starts_with("response.level", x = colnames(dat))) } if (show.p == FALSE) { dat <- dplyr::select(dat, -string_starts_with("p.value", x = colnames(dat)), -string_starts_with("std.p.value", x = colnames(dat))) } if (show.df == FALSE) { dat <- dplyr::select(dat, -string_starts_with("df", x = colnames(dat))) } if (!is.null(terms)) { terms <- parse_terms(terms) keep_terms <- which(dat$term %in% terms) dat <- dplyr::slice(dat, !! keep_terms) } if (!is.null(rm.terms)) { rm.terms <- parse_terms(rm.terms) keep_terms <- which(!(dat$term %in% rm.terms)) dat <- dplyr::slice(dat, !! keep_terms) } dat } prepare.labels <- function(x, grp, categorical, models) { # remove variable names from factor is ref levels are shown if (grp) { for (i in models) { f <- names(which(sapply(insight::get_data(i), is.factor))) remove <- names(x) %in% f if (any(remove)) { x <- x[!remove] categorical <- categorical[!remove] } } } x_var <- names(x[!categorical]) x_val <- names(x[categorical]) for (i in x_var) { pos <- string_starts_with(i, x = x_val) if (!grp || (length(pos) > 0 && length(pos) < 3)) { match.vals <- x_val[pos] x[match.vals] <- sprintf("%s: %s", x[i], x[match.vals]) } } x } format_p_values <- function(dat, p.style, digits.p, emph.p, p.threshold){ # get stars and significance at alpha = 0.05 ---- dat <- dat %>% dplyr::mutate( p.stars = get_p_stars(.data$p.value, p.threshold), p.sig = .data$p.value < .05 ) # scientific notation ---- if (grepl("scientific", p.style)) { dat$p.value <- formatC(dat$p.value, format = "e", digits = digits.p) } else { dat$p.value <- sprintf("%.*f", digits.p, dat$p.value) } # emphasize p-values ---- if (emph.p && !all(dat$p.value == "NA")) dat$p.value[which(dat$p.sig)] <- sprintf("%s", dat$p.value[which(dat$p.sig)]) dat <- dplyr::select(dat, -.data$p.sig) # indicate p <0.001 ---- pv <- paste0("0.", paste(rep("0", digits.p), collapse = "")) dat$p.value[dat$p.value == pv] <- "<0.001" pv <- paste0("0.", paste(rep("0", digits.p), collapse = ""), "") dat$p.value[dat$p.value == pv] <- "<0.001" dat } sjPlot/R/plot_frq.R0000644000176200001440000006341014104233216013677 0ustar liggesusersutils::globalVariables("density") #' @title Plot frequencies of variables #' @name plot_frq #' #' @description Plot frequencies of a variable as bar graph, histogram, box plot etc. #' #' @note This function only works with variables with integer values (or numeric #' factor levels), i.e. scales / centered variables #' with fractional part may result in unexpected behaviour. #' #' @param ... Optional, unquoted names of variables that should be selected for #' further processing. Required, if \code{data} is a data frame (and no #' vector) and only selected variables from \code{data} should be processed. #' You may also use functions like \code{:} or tidyselect's #' select_helpers. #' @param sort.frq Determines whether categories should be sorted #' according to their frequencies or not. Default is \code{"none"}, so #' categories are not sorted by frequency. Use \code{"asc"} or #' \code{"desc"} for sorting categories ascending or descending order. #' @param geom.colors User defined color for geoms, e.g. \code{geom.colors = "#0080ff"}. #' @param errorbar.color Color of confidence interval bars (error bars). #' Only applies to \code{type = "bar"}. In case of dot plots, error bars #' will have same colors as dots (see \code{geom.colors}). #' @param show.mean Logical, if \code{TRUE}, a vertical line in histograms #' is drawn to indicate the mean value of the variables. Only #' applies to histogram-charts. #' @param show.mean.val Logical, if \code{TRUE} (default), the mean value #' is printed to the vertical line that indicates the variable's #' mean. Only applies to histogram-charts. #' @param show.sd Logical, if \code{TRUE}, the standard deviation #' is annotated as shaded rectangle around the mean intercept #' line. Only applies to histogram-charts. #' @param mean.line.type Numeric value, indicating the linetype of the mean #' intercept line. Only applies to histogram-charts and #' when \code{show.mean = TRUE}. #' @param mean.line.size Numeric, size of the mean intercept line. Only #' applies to histogram-charts and when \code{show.mean = TRUE}. #' @param normal.curve Logical, if \code{TRUE}, a normal curve, which is adjusted to the data, #' is plotted over the histogram or density plot. Default is #' \code{FALSE}. Only applies when histograms or density plots are plotted (see \code{type}). #' @param normal.curve.color Color of the normal curve line. Only #' applies if \code{normal.curve = TRUE}. #' @param normal.curve.size Numeric, size of the normal curve line. Only #' applies if \code{normal.curve = TRUE}. #' @param normal.curve.alpha Transparancy level (alpha value) of the normal curve. Only #' applies if \code{normal.curve = TRUE}. #' @param xlim Numeric vector of length two, defining lower and upper axis limits #' of the x scale. By default, this argument is set to \code{NULL}, i.e. the #' x-axis fits to the required range of the data. #' @param axis.title Character vector of length one or two (depending on #' the plot function and type), used as title(s) for the x and y axis. #' If not specified, a default labelling is chosen. #' \strong{Note:} Some plot types do not support this argument. In such #' cases, use the return value and add axis titles manually with #' \code{\link[ggplot2]{labs}}, e.g.: \code{$plot.list[[1]] + labs(x = ...)} #' #' @inheritParams plot_scatter #' @inheritParams plot_grpfrq #' @inheritParams tab_xtab #' #' @return A ggplot-object. #' #' @examples #' library(sjlabelled) #' data(efc) #' data(iris) #' #' # simple plots, two different notations #' plot_frq(iris, Species) #' plot_frq(efc$tot_sc_e) #' #' # boxplot #' plot_frq(efc$e17age, type = "box") #' #' if (require("dplyr")) { #' # histogram, pipe-workflow #' efc %>% #' dplyr::select(e17age, c160age) %>% #' plot_frq(type = "hist", show.mean = TRUE) #' #' # bar plot(s) #' plot_frq(efc, e42dep, c172code) #' } #' #' if (require("dplyr") && require("gridExtra")) { #' # grouped data frame, all panels in one plot #' efc %>% #' group_by(e42dep) %>% #' plot_frq(c161sex) %>% #' plot_grid() #' } #' #' \donttest{ #' library(sjmisc) #' # grouped variable #' ageGrp <- group_var(efc$e17age) #' ageGrpLab <- group_labels(efc$e17age) #' plot_frq(ageGrp, title = get_label(efc$e17age), axis.labels = ageGrpLab) #' #' # plotting confidence intervals. expand grid and v/hjust for text labels #' plot_frq( #' efc$e15relat, type = "dot", show.ci = TRUE, sort.frq = "desc", #' coord.flip = TRUE, expand.grid = TRUE, vjust = "bottom", hjust = "left" #' ) #' #' # histogram with overlayed normal curve #' plot_frq(efc$c160age, type = "h", show.mean = TRUE, show.mean.val = TRUE, #' normal.curve = TRUE, show.sd = TRUE, normal.curve.color = "blue", #' normal.curve.size = 3, ylim = c(0,50)) #' } #' @import ggplot2 #' @importFrom sjstats weighted_sd #' @importFrom sjmisc group_labels group_var to_value frq #' @importFrom sjlabelled set_labels drop_labels #' @importFrom stats na.omit sd weighted.mean dnorm #' @importFrom rlang .data #' @export plot_frq <- function(data, ..., title = "", weight.by = NULL, title.wtd.suffix = NULL, sort.frq = c("none", "asc", "desc"), type = c("bar", "dot", "histogram", "line", "density", "boxplot", "violin"), geom.size = NULL, geom.colors = "#336699", errorbar.color = "darkred", axis.title = NULL, axis.labels = NULL, xlim = NULL, ylim = NULL, wrap.title = 50, wrap.labels = 20, grid.breaks = NULL, expand.grid = FALSE, show.values = TRUE, show.n = TRUE, show.prc = TRUE, show.axis.values = TRUE, show.ci = FALSE, show.na = FALSE, show.mean = FALSE, show.mean.val = TRUE, show.sd = TRUE, drop.empty = TRUE, mean.line.type = 2, mean.line.size = 0.5, inner.box.width = 0.15, inner.box.dotsize = 3, normal.curve = FALSE, normal.curve.color = "red", normal.curve.size = 0.8, normal.curve.alpha = 0.4, auto.group = NULL, coord.flip = FALSE, vjust = "bottom", hjust = "center", y.offset = NULL) { # Match arguments ----- type <- match.arg(type) sort.frq <- match.arg(sort.frq) plot_data <- get_dplyr_dot_data(data, dplyr::quos(...)) if (!is.data.frame(plot_data)) { plot_data <- data.frame(plot_data, stringsAsFactors = FALSE) colnames(plot_data) <- deparse(substitute(data)) } pl <- NULL if (inherits(plot_data, "grouped_df")) { # get grouped data grps <- get_grouped_data(plot_data) # now plot everything for (i in seq_len(nrow(grps))) { # copy back labels to grouped data frame tmp <- sjlabelled::copy_labels(grps$data[[i]], data) # prepare argument list, including title tmp.title <- get_grouped_plottitle(plot_data, grps, i, sep = "\n") # plot plots <- lapply(colnames(tmp), function(.d) { plot_frq_helper( var.cnt = tmp[[.d]], title = tmp.title, weight.by = weight.by, title.wtd.suffix, sort.frq, type, geom.size, geom.colors, errorbar.color, axis.title, axis.labels, xlim, ylim, wrap.title, wrap.labels, grid.breaks, expand.grid, show.values, show.n, show.prc, show.axis.values, show.ci, show.na, show.mean, show.mean.val, show.sd, drop.empty, mean.line.type, mean.line.size, inner.box.width, inner.box.dotsize, normal.curve, normal.curve.color, normal.curve.size, normal.curve.alpha, auto.group, coord.flip, vjust, hjust, y.offset, var.name = .d ) }) # add plots, check for NULL results pl <- c(pl, plots) } } else { pl <- lapply(colnames(plot_data), function(.d) { plot_frq_helper( var.cnt = plot_data[[.d]], title, weight.by = weight.by, title.wtd.suffix, sort.frq, type, geom.size, geom.colors, errorbar.color, axis.title, axis.labels, xlim, ylim, wrap.title, wrap.labels, grid.breaks, expand.grid, show.values, show.n, show.prc, show.axis.values, show.ci, show.na, show.mean, show.mean.val, show.sd, drop.empty, mean.line.type, mean.line.size, inner.box.width, inner.box.dotsize, normal.curve, normal.curve.color, normal.curve.size, normal.curve.alpha, auto.group, coord.flip, vjust, hjust, y.offset, var.name = .d ) }) if (length(pl) == 1) pl <- pl[[1]] } pl } plot_frq_helper <- function( var.cnt, title, weight.by, title.wtd.suffix, sort.frq, type, geom.size, geom.colors, errorbar.color, axis.title, axis.labels, xlim, ylim, wrap.title, wrap.labels, grid.breaks, expand.grid, show.values, show.n, show.prc, show.axis.values, show.ci, show.na, show.mean, show.mean.val, show.sd, drop.empty, mean.line.type, mean.line.size, inner.box.width, inner.box.dotsize, normal.curve, normal.curve.color, normal.curve.size, normal.curve.alpha, auto.group, coord.flip, vjust, hjust, y.offset, var.name = NULL) { # remove empty value-labels if (drop.empty) { var.cnt <- sjlabelled::drop_labels(var.cnt) } # try to find some useful default offsets for textlabels, # depending on plot range and flipped coordinates if (is.null(y.offset)) { # get maximum y-pos y.offset <- ceiling(max(table(var.cnt)) / 100) if (coord.flip) { if (missing(vjust)) vjust <- "center" if (missing(hjust)) hjust <- "bottom" if (hjust == "bottom") y_offset <- y.offset else if (hjust == "top") y_offset <- -y.offset else y_offset <- 0 } else { if (vjust == "bottom") y_offset <- y.offset else if (vjust == "top") y_offset <- -y.offset else y_offset <- 0 } } else { y_offset <- y.offset } if (is.null(axis.title)) axis.title <- sjlabelled::get_label(var.cnt, def.value = var.name) if (is.null(title)) title <- sjlabelled::get_label(var.cnt, def.value = var.name) # remove titles if empty if (!is.null(axis.title) && axis.title == "") axis.title <- NULL if (!is.null(title) && title == "") title <- NULL # check color argument if (length(geom.colors) > 1) geom.colors <- geom.colors[1] # default grid-expansion if (isTRUE(expand.grid) || (missing(expand.grid) && type == "histogram")) { expand.grid <- waiver() } else { expand.grid <- c(0, 0) } # for histograms or density plots... xv <- sjmisc::to_value(stats::na.omit(var.cnt)) # check for nice bin-width defaults if (type %in% c("histogram", "density") && !is.null(geom.size) && geom.size < round(diff(range(xv)) / 40)) message("Using very small binwidth. Consider adjusting `geom.size` argument.") # create second data frame hist.dat <- data.frame(xv) # check default geom.size ----- if (is.null(geom.size)) { geom.size <- dplyr::case_when( type == "bar" ~ .7, type == "dot" ~ 2.5, type == "density" ~ ceiling(diff(range(xv)) / 40), type == "histogram" ~ ceiling(diff(range(xv)) / 40), type == "line" ~ .8, type == "boxplot" ~ .3, type == "violin" ~ .3, TRUE ~ .7 ) } # check whether variable should be auto-grouped ----- if (!is.null(auto.group) && length(unique(var.cnt)) >= auto.group) { message(sprintf( "`%s` has %i unique values and was grouped...", var.name, length(unique(var.cnt)) )) } if (!is.null(weight.by)) { dat <- data.frame( var.cnt = var.cnt, weight.by = weight.by, stringsAsFactors = FALSE ) } else { dat <- data.frame( var.cnt = var.cnt, stringsAsFactors = FALSE ) } # create frequency data frame ----- df.frq <- suppressMessages(sjmisc::frq( x = dat, "var.cnt", sort.frq = sort.frq, weights = "weight.by", auto.grp = auto.group, show.na = show.na )) mydat <- df.frq[[1]] # remove empty if (drop.empty) mydat <- mydat[mydat$frq > 0, ] # add confindence intervals for frequencies total_n = sum(mydat$frq) rel_frq <- as.numeric(mydat$frq / total_n) ci <- 1.96 * suppressWarnings(sqrt(rel_frq * (1 - rel_frq) / total_n)) mydat$upper.ci <- total_n * (rel_frq + ci) mydat$lower.ci <- total_n * (rel_frq - ci) mydat$rel.upper.ci <- rel_frq + ci mydat$rel.lower.ci <- rel_frq - ci # any labels detected? if (!is.null(mydat$label) && is.null(axis.labels) && !all(stats::na.omit(mydat$label) == "")) axis.labels <- mydat$label else if (is.null(axis.labels)) axis.labels <- mydat$val # wrap labels axis.labels <- sjmisc::word_wrap(axis.labels, wrap.labels) # define text label position if (show.ci) mydat$label.pos <- mydat$upper.ci else mydat$label.pos <- mydat$frq # Trim labels and title to appropriate size ----- # check length of diagram title and split longer string into new lines # every 50 chars if (!is.null(title)) { # if we have weighted values, say that in diagram's title if (!is.null(title.wtd.suffix)) title <- paste(title, title.wtd.suffix, sep = "") title <- sjmisc::word_wrap(title, wrap.title) } # check length of x-axis title and split longer string into new lines # every 50 chars if (!is.null(axis.title)) axis.title <- sjmisc::word_wrap(axis.title, wrap.title) # count variable may not be a factor! if (is.factor(var.cnt) || is.character(var.cnt)) { var.cnt <- sjmisc::to_value(var.cnt, keep.labels = F) } # If we have a histogram, caluclate means of groups if (is.null(weight.by)) { mittelwert <- mean(var.cnt, na.rm = TRUE) stddev <- stats::sd(var.cnt, na.rm = TRUE) } else { mittelwert <- stats::weighted.mean(var.cnt, weight.by, na.rm = TRUE) stddev <- sjstats::weighted_sd(var.cnt, weights = weight.by) } # If we have boxplots, use different data frame structure if (type == "boxplot" || type == "violin") { mydat <- stats::na.omit(data.frame(cbind( grp = 1, frq = var.cnt, val = var.cnt ))) mydat$grp <- as.factor(mydat$grp) } # Prepare bar charts trimViolin <- FALSE lower_lim <- 0 # calculate upper y-axis-range # if we have a fixed value, use this one here if (!is.null(ylim) && length(ylim) == 2) { lower_lim <- ylim[1] upper_lim <- ylim[2] } else { # if we have boxplots, we have different ranges, so we can adjust # the y axis if (type == "boxplot" || type == "violin") { # use an extra standard-deviation as limits for the y-axis when we have boxplots lower_lim <- min(var.cnt, na.rm = TRUE) - floor(stats::sd(var.cnt, na.rm = TRUE)) upper_lim <- max(var.cnt, na.rm = TRUE) + ceiling(stats::sd(var.cnt, na.rm = TRUE)) # make sure that the y-axis is not below zero if (lower_lim < 0) { lower_lim <- 0 trimViolin <- TRUE } } else if (type == "histogram") { # what is the maximum values after binning for histograms? hist.grp.cnt <- ceiling(diff(range(var.cnt, na.rm = T)) / geom.size) # ... or the amount of max. answers per category # add 10% margin to upper limit upper_lim <- max(pretty(table( sjmisc::group_var( var.cnt, size = "auto", n = hist.grp.cnt, append = FALSE ) ) * 1.1)) } else { if (show.ci) upper_lim <- max(pretty(mydat$upper.ci * 1.1)) else upper_lim <- max(pretty(mydat$frq * 1.1)) } } # If we want to include NA, use raw percentages as valid percentages if (show.na) mydat$valid.prc <- mydat$raw.prc # don't display value labels when we have boxplots or violin plots if (type == "boxplot" || type == "violin") show.values <- FALSE if (show.values) { # here we have counts and percentages if (show.prc && show.n) { if (coord.flip) { ggvaluelabels <- geom_text( label = sprintf("%i (%.01f%%)", mydat$frq, mydat$valid.prc), hjust = hjust, vjust = vjust, aes(y = .data$label.pos + y_offset) ) } else { ggvaluelabels <- geom_text( label = sprintf("%i\n(%.01f%%)", mydat$frq, mydat$valid.prc), hjust = hjust, vjust = vjust, aes(y = .data$label.pos + y_offset) ) } } else if (show.n) { # here we have counts, without percentages ggvaluelabels <- geom_text( label = sprintf("%i", mydat$frq), hjust = hjust, vjust = vjust, aes(y = .data$label.pos + y_offset) ) } else if (show.prc) { # here we have counts, without percentages ggvaluelabels <- geom_text( label = sprintf("%.01f%%", mydat$valid.prc), hjust = hjust, vjust = vjust, aes(y = .data$label.pos + y_offset) ) } else { # no labels ggvaluelabels <- geom_text(aes(y = .data$frq), label = "") } } else { # no labels ggvaluelabels <- geom_text(aes(y = .data$frq), label = "") } # Set up grid breaks maxx <- if (is.numeric(mydat$val)) max(mydat$val) + 1 else nrow(mydat) if (is.null(grid.breaks)) { gridbreaks <- waiver() histgridbreaks <- waiver() } else { gridbreaks <- c(seq(lower_lim, upper_lim, by = grid.breaks)) histgridbreaks <- c(seq(lower_lim, maxx, by = grid.breaks)) } # set Y-axis, depending on the calculated upper y-range. # It either corresponds to the maximum amount of cases in the data set # (length of var) or to the highest count of var's categories. if (show.axis.values) { yscale <- scale_y_continuous( limits = c(lower_lim, upper_lim), expand = expand.grid, breaks = gridbreaks ) } else { yscale <- scale_y_continuous( limits = c(lower_lim, upper_lim), expand = expand.grid, breaks = gridbreaks, labels = NULL ) } # bar and dot plot start here! ----- if (type == "bar" || type == "dot") { # define geom if (type == "bar") { geob <- geom_bar(stat = "identity", width = geom.size, fill = geom.colors) } else if (type == "dot") { geob <- geom_point(size = geom.size, colour = geom.colors) } # as factor, but preserve order mydat$val <- factor(mydat$val, levels = unique(mydat$val)) # mydat is a data frame that only contains one variable (var). # Must be declared as factor, so the bars are central aligned to # each x-axis-break. baseplot <- ggplot(mydat, aes(x = .data$val, y = .data$frq)) + geob + yscale + # remove guide / legend guides(fill = "none") + # show absolute and percentage value of each bar. ggvaluelabels + # print value labels to the x-axis. # If argument "axis.labels" is NULL, the category numbers (1 to ...) # appear on the x-axis scale_x_discrete(labels = axis.labels) # add error bars if (show.ci) { ebcol <- ifelse(type == "dot", geom.colors, errorbar.color) # print confidence intervalls (error bars) baseplot <- baseplot + geom_errorbar(aes_string(ymin = "lower.ci", ymax = "upper.ci"), colour = ebcol, width = 0) } # check whether coordinates should be flipped, i.e. # swap x and y axis if (coord.flip) baseplot <- baseplot + coord_flip() # Start box plot here ----- } else if (type == "boxplot" || type == "violin") { # setup base plot baseplot <- ggplot(mydat, aes_string(x = "grp", y = "frq")) # and x-axis scalex <- scale_x_discrete(labels = "") if (type == "boxplot") { baseplot <- baseplot + geom_boxplot(width = geom.size, fill = geom.colors, notch = show.ci) } else { baseplot <- baseplot + geom_violin(trim = trimViolin, width = geom.size, fill = geom.colors) # if we have a violin plot, add an additional boxplot inside to show # more information if (show.ci) { baseplot <- baseplot + geom_boxplot(width = inner.box.width, fill = "white", notch = TRUE) } else { baseplot <- baseplot + geom_boxplot(width = inner.box.width, fill = "white") } } # if we have boxplots or violon plots, also add a point that indicates # the mean value # different fill colours, because violin boxplots have white background fcsp <- ifelse(type == "boxplot", "white", "black") baseplot <- baseplot + stat_summary(fun.y = "mean", geom = "point", shape = 21, size = inner.box.dotsize, fill = fcsp) # no additional labels for the x- and y-axis, only diagram title baseplot <- baseplot + yscale + scalex # Start density plot here ----- } else if (type == "density") { # First, plot histogram with density curve baseplot <- ggplot(hist.dat, aes(x = .data$xv)) + geom_histogram(aes(y = stat(density)), binwidth = geom.size, fill = geom.colors) + # transparent density curve above bars geom_density(aes(y = stat(density)), fill = "cornsilk", alpha = 0.3) + # remove margins from left and right diagram side scale_x_continuous(expand = expand.grid, breaks = histgridbreaks, limits = xlim) # check whether user wants to overlay the histogram # with a normal curve if (normal.curve) { baseplot <- baseplot + stat_function( fun = dnorm, args = list( mean = mean(hist.dat$xv), sd = stats::sd(hist.dat$xv) ), colour = normal.curve.color, size = normal.curve.size, alpha = normal.curve.alpha ) } } else { # Since the density curve shows no absolute numbers (counts) on the # y-axis, have also the opportunity to plot "real" histrograms with # counts on the y-axis if (type == "histogram") { # original data needed for normal curve baseplot <- ggplot(mydat) + # second data frame mapped to the histogram geom geom_histogram(data = hist.dat, aes(x = .data$xv), binwidth = geom.size, fill = geom.colors) } else { baseplot <- ggplot(mydat, aes(x = .data$val, y = .data$frq)) + geom_area(alpha = 0.3) + geom_line(size = geom.size, colour = geom.colors) + ggvaluelabels } # check whether user wants to overlay the histogram # with a normal curve if (normal.curve) { baseplot <- baseplot + stat_function( fun = function(xx, mean, sd, n) { n * stats::dnorm(x = xx, mean = mean, sd = sd) }, args = with(mydat, c( mean = mittelwert, sd = stddev, n = length(var.cnt) )), colour = normal.curve.color, size = normal.curve.size, alpha = normal.curve.alpha ) } # if we have a histogram, add mean-lines if (show.mean) { baseplot <- baseplot + # vertical lines indicating the mean geom_vline(xintercept = mittelwert, linetype = mean.line.type, size = mean.line.size) # check whether meanvalue should be shown. if (show.mean.val) { baseplot <- baseplot + # use annotation instead of geomtext, because we need mean value only printed once annotate( "text", x = mittelwert, y = upper_lim, parse = TRUE, label = paste( "italic(bar(x)) == ", round(mittelwert, 1), "~~italic(s) == ", round(stddev, 1) ), vjust = "top", hjust = "top" ) } # check whether the user wants to plot standard deviation area if (show.sd) { baseplot <- baseplot + # first draw shaded rectangle. these are by default in grey colour with very high transparancy annotate("rect", xmin = mittelwert - stddev, xmax = mittelwert + stddev, ymin = 0, ymax = c(upper_lim), fill = "grey70", alpha = 0.2) + # draw border-lines for shaded rectangle geom_vline(xintercept = mittelwert - stddev, linetype = 3, size = mean.line.size, alpha = 0.7) + geom_vline(xintercept = mittelwert + stddev, linetype = 3, size = mean.line.size, alpha = 0.7) } } # show absolute and percentage value of each bar. baseplot <- baseplot + yscale + # continuous x-scale for histograms scale_x_continuous(limits = xlim, expand = expand.grid, breaks = histgridbreaks) } # set axes text and baseplot <- baseplot + labs(title = title, x = axis.title, y = NULL) # Check whether ggplot object should be returned or plotted baseplot } sjPlot/R/plot_scatter.R0000644000176200001440000002515013563762623014574 0ustar liggesusers#' @title Plot (grouped) scatter plots #' @name plot_scatter #' #' @description Display scatter plot of two variables. Adding a grouping variable to #' the scatter plot is possible. Furthermore, fitted lines can be added #' for each group as well as for the overall plot. #' #' @param data A data frame, or a grouped data frame. #' @param x Name of the variable for the x-axis. #' @param y Name of the variable for the y-axis. #' @param grp Optional, name of the grouping-variable. If not missing, the #' scatter plot will be grouped. See 'Examples'. #' @param dot.labels Character vector with names for each coordinate pair given #' by \code{x} and \code{y}, so text labels are added to the plot. #' Must be of same length as \code{x} and \code{y}. #' If \code{dot.labels} has a different length, data points will be trimmed #' to match \code{dot.labels}. If \code{dot.labels = NULL} (default), #' no labels are printed. #' @param label.size Size of text labels if argument \code{dot.labels} is used. #' @param fit.line,fit.grps Specifies the method to add a fitted line accross #' the data points. Possible values are for instance \code{"lm"}, \code{"glm"}, #' \code{"loess"} or \code{"auto"}. If \code{NULL}, no line is plotted. #' \code{fit.line} adds a fitted line for the complete data, while \code{fit.grps} #' adds a fitted line for each subgroup of \code{grp}. #' @param emph.dots Logical, if \code{TRUE}, overlapping points at same coordinates #' will be becomme larger, so point size indicates amount of overlapping. #' @param show.rug Logical, if \code{TRUE}, a marginal rug plot is displayed #' in the graph. #' #' @return A ggplot-object. For grouped data frames, a list of ggplot-objects for #' each group in the data. #' #' @inheritParams plot_model #' @inheritParams plot_grpfrq #' #' @examples #' # load sample date #' library(sjmisc) #' library(sjlabelled) #' data(efc) #' #' # simple scatter plot #' plot_scatter(efc, e16sex, neg_c_7) #' #' # simple scatter plot, increased jittering #' plot_scatter(efc, e16sex, neg_c_7, jitter = .4) #' #' # grouped scatter plot #' plot_scatter(efc, c160age, e17age, e42dep) #' #' # grouped scatter plot with marginal rug plot #' # and add fitted line for complete data #' plot_scatter( #' efc, c12hour, c160age, c172code, #' show.rug = TRUE, fit.line = "lm" #' ) #' #' # grouped scatter plot with marginal rug plot #' # and add fitted line for each group #' plot_scatter( #' efc, c12hour, c160age, c172code, #' show.rug = TRUE, fit.grps = "loess", #' grid = TRUE #' ) #' #' @importFrom sjlabelled copy_labels #' @importFrom dplyr n_distinct #' @import ggplot2 #' @export plot_scatter <- function( data, x, y, grp, title = "", legend.title = NULL, legend.labels = NULL, dot.labels = NULL, axis.titles = NULL, dot.size = 1.5, label.size = 3, colors = "metro", fit.line = NULL, fit.grps = NULL, show.rug = FALSE, show.legend = TRUE, show.ci = FALSE, wrap.title = 50, wrap.legend.title = 20, wrap.legend.labels = 20, jitter = .05, emph.dots = FALSE, grid = FALSE ) { # check available packages if (!is.null(dot.labels) && !requireNamespace("ggrepel", quietly = TRUE)) { stop("Package `ggrepel` needed to plot labels. Please install it.", call. = FALSE) } # get data name.x <- deparse(substitute(x)) name.y <- deparse(substitute(y)) if (!missing(grp)) name.grp <- deparse(substitute(grp)) else name.grp <- NULL # optionally hide legend if not needed if (!is.null(name.grp) && grid && missing(show.legend)) show.legend <- FALSE pl <- NULL if (inherits(data, "grouped_df")) { # get grouped data grps <- get_grouped_data(data) # now plot everything for (i in seq_len(nrow(grps))) { # copy back labels to grouped data frame tmp <- sjlabelled::copy_labels(grps$data[[i]], data) # prepare argument list, including title tmp.title <- get_grouped_plottitle(data, grps, i, sep = "\n") # copy data x <- tmp[[name.x]] y <- tmp[[name.y]] if (!is.null(name.grp)) grp <- tmp[[name.grp]] else grp <- NULL # prepare color palette if (!is.null(grp)) collen <- dplyr::n_distinct(grp, na.rm = TRUE) else collen <- 1 colors <- col_check2(colors, collen) # plot plots <- scatter_helper( x, y, grp, title = tmp.title, legend.title, legend.labels, dot.labels, axis.titles, dot.size, label.size, colors, fit.line, fit.grps, show.rug, show.legend, show.ci, wrap.title, wrap.legend.title, wrap.legend.labels, jitter, emph.dots, grid, name.x, name.y, name.grp ) # add plots, check for NULL results pl <- c(pl, list(plots)) } } else { # copy data x <- data[[name.x]] y <- data[[name.y]] if (!is.null(name.grp)) grp <- data[[name.grp]] else grp <- NULL # prepare color palette if (!is.null(grp)) collen <- dplyr::n_distinct(grp, na.rm = TRUE) else collen <- 1 colors <- col_check2(colors, collen) # plot pl <- scatter_helper( x, y, grp, title, legend.title, legend.labels, dot.labels, axis.titles, dot.size, label.size, colors, fit.line, fit.grps, show.rug, show.legend, show.ci, wrap.title, wrap.legend.title, wrap.legend.labels, jitter, emph.dots, grid, name.x, name.y, name.grp ) } pl } #' @importFrom stats na.omit #' @importFrom sjlabelled get_labels get_label #' @importFrom sjmisc word_wrap scatter_helper <- function( x, y, grp, title, legend.title, legend.labels, dot.labels, axis.titles, dot.size, label.size, colors, fit.line, fit.grps, show.rug, show.legend, show.ci, wrap.title, wrap.legend.title, wrap.legend.labels, jitter, emph.dots, grid, name.x, name.y, name.grp ) { # any missing names? if (is.null(name.x) || name.x == "NULL") name.x <- "" if (is.null(name.y) || name.y == "NULL") name.y <- "" # copy titles if (is.null(axis.titles)) { axisTitle.x <- NULL axisTitle.y <- NULL } else { axisTitle.x <- axis.titles[1] if (length(axis.titles) > 1) axisTitle.y <- axis.titles[2] else axisTitle.y <- NULL } # try to automatically set labels is not passed as parameter if (is.null(legend.labels) && !is.null(grp)) { legend.labels <- sjlabelled::get_labels( grp, attr.only = F, values = NULL, non.labelled = T ) } if (is.null(legend.title) && !is.null(grp)) legend.title <- sjlabelled::get_label(grp, def.value = name.grp) if (is.null(axisTitle.x)) axisTitle.x <- sjlabelled::get_label(x, def.value = name.x) if (is.null(axisTitle.y)) axisTitle.y <- sjlabelled::get_label(y, def.value = name.y) if (is.null(title)) { t1 <- sjlabelled::get_label(x, def.value = name.x) t2 <- sjlabelled::get_label(y, def.value = name.y) if (!is.null(t1) && !is.null(t2)) { title <- paste0(t1, " by ", t2) if (!is.null(grp)) { t3 <- sjlabelled::get_label(grp, def.value = name.grp) if (!is.null(t3)) title <- paste0(title, " (grouped by ", t3, ")") } } } # remove titles if empty if (!is.null(legend.title) && legend.title == "") legend.title <- NULL if (!is.null(axisTitle.x) && axisTitle.x == "") axisTitle.x <- NULL if (!is.null(axisTitle.y) && axisTitle.y == "") axisTitle.y <- NULL if (!is.null(title) && title == "") title <- NULL # create data frame # check whether we have grouping variable if (is.null(grp)) { # if not, add a dummy grouping variable grp <- rep(1, length(x)) # we don't need legend here show.legend <- FALSE } # get value labels from attribute grl <- sjlabelled::get_labels(grp, attr.only = T) # simple data frame dat <- stats::na.omit(data.frame(x = x, y = y, grp = grp)) # group as factor dat$grp <- as.factor(dat$grp) # set labelled levels, for facets if (grid && !is.null(grl)) levels(dat$grp) <- grl # do we have point labels? if (!is.null(dot.labels)) { # check length if (length(dot.labels) > nrow(dat)) { # Tell user that we have too many point labels warning("More point labels than data points. Omitting remaining point labels", call. = F) # shorten vector dot.labels <- dot.labels[seq_len(nrow(dat))] } else if (length(dot.labels) < nrow(dat)) { # Tell user that we have too less point labels warning("Less point labels than data points. Omitting remaining data point", call. = F) # shorten data frame dat <- dat[seq_len(length(dot.labels)), ] } # append labels dat$dot.lab <- as.character(dot.labels) } # fix and wrap labels and titles if (is.null(legend.labels)) legend.labels <- as.character(sort(unique(dat$grp))) legend.labels <- sjmisc::word_wrap(legend.labels, wrap.legend.labels) if (!is.null(legend.title)) legend.title <- sjmisc::word_wrap(legend.title, wrap.legend.title) if (!is.null(title)) title <- sjmisc::word_wrap(title, wrap.title) if (!is.null(axisTitle.x)) axisTitle.x <- sjmisc::word_wrap(axisTitle.x, wrap.title) if (!is.null(axisTitle.y)) axisTitle.y <- sjmisc::word_wrap(axisTitle.y, wrap.title) # Plot scatter plot scp <- ggplot(dat, aes_string(x = "x", y = "y", colour = "grp")) # add marginal rug if (show.rug) { scp <- scp + geom_rug(position = position_jitter(width = jitter)) } # add data points if (emph.dots) { # indicate overlapping dots by point size scp <- scp + geom_count(show.legend = F, position = position_jitter(width = jitter)) } else { # else plot dots scp <- scp + geom_jitter(size = dot.size, position = position_jitter(width = jitter)) } # add labels if (!is.null(dot.labels)) { scp <- scp + ggrepel::geom_text_repel(aes_string(label = "dot.lab"), size = label.size) } # Show fitted lines if (!is.null(fit.grps)) { scp <- scp + stat_smooth(data = dat, aes_string(colour = "grp"), method = fit.grps, se = show.ci) } if (!is.null(fit.line)) { scp <- scp + stat_smooth(method = fit.line, se = show.ci, colour = "black") } # set font size for axes. scp <- scp + labs(title = title, x = axisTitle.x, y = axisTitle.y, colour = legend.title) # facet plot if (grid) scp <- scp + facet_wrap(~grp) sj.setGeomColors( scp, colors, length(legend.labels), show.legend, legend.labels ) } sjPlot/R/plot_gpt.R0000644000176200001440000002530013563567221013713 0ustar liggesusersutils::globalVariables("n") #' @title Plot grouped proportional tables #' @name plot_gpt #' #' @description Plot grouped proportional crosstables, where the proportion of #' each level of \code{x} for the highest category in \code{y} #' is plotted, for each subgroup of \code{grp}. #' #' @param x Categorical variable, where the proportion of each category in #' \code{x} for the highest category of \code{y} will be printed #' along the x-axis. #' @param y Categorical or numeric variable. If not a binary variable, \code{y} #' will be recoded into a binary variable, dichtomized at the highest #' category and all remaining categories. #' @param grp Grouping variable, which will define the y-axis #' @param shape.fill.color Optional color vector, fill-color for non-filled shapes #' @param shapes Numeric vector with shape styles, used to map the different #' categories of \code{x}. #' @param show.total Logical, if \code{TRUE}, a total summary line for all aggregated #' \code{grp} is added. #' @param annotate.total Logical, if \code{TRUE} and \code{show.total = TRUE}, #' the total-row in the figure will be highlighted with a slightly #' shaded background. #' @param axis.lim Numeric vector of length 2, defining the range of the plot axis. #' Depending on plot type, may effect either x- or y-axis, or both. #' For multiple plot outputs (e.g., from \code{type = "eff"} or #' \code{type = "slope"} in \code{\link{plot_model}}), \code{axis.lim} may #' also be a list of vectors of length 2, defining axis limits for each #' plot (only if non-faceted). #' @param show.p Logical, adds significance levels to values, or value and #' variable labels. #' #' @return A ggplot-object. #' #' @inheritParams plot_scatter #' @inheritParams plot_grpfrq #' @inheritParams plot_xtab #' #' @details The p-values are based on \code{\link[stats]{chisq.test}} of \code{x} #' and \code{y} for each \code{grp}. #' #' @examples #' data(efc) #' #' # the proportion of dependency levels in female #' # elderly, for each family carer's relationship #' # to elderly #' plot_gpt(efc, e42dep, e16sex, e15relat) #' #' # proportion of educational levels in highest #' # dependency category of elderly, for different #' # care levels #' plot_gpt(efc, c172code, e42dep, n4pstu) #' #' @import ggplot2 #' @importFrom dplyr group_by summarise bind_rows "%>%" n #' @importFrom scales percent #' @importFrom sjmisc to_factor rec #' @importFrom stats na.omit chisq.test #' @export plot_gpt <- function( data, x, y, grp, colors = "metro", geom.size = 2.5, shape.fill.color = "#f0f0f0", shapes = c(15, 16, 17, 18, 21, 22, 23, 24, 25, 7, 8, 9, 10, 12), title = NULL, axis.labels = NULL, axis.titles = NULL, legend.title = NULL, legend.labels = NULL, wrap.title = 50, wrap.labels = 15, wrap.legend.title = 20, wrap.legend.labels = 20, axis.lim = NULL, grid.breaks = NULL, show.total = TRUE, annotate.total = TRUE, show.p = TRUE, show.n = TRUE) { # get data name.x <- deparse(substitute(x)) name.y <- deparse(substitute(y)) name.grp <- deparse(substitute(grp)) pl <- NULL if (inherits(data, "grouped_df")) { # get grouped data grps <- get_grouped_data(data) # now plot everything for (i in seq_len(nrow(grps))) { # copy back labels to grouped data frame tmp <- sjlabelled::copy_labels(grps$data[[i]], data) # prepare argument list, including title tmp.title <- get_grouped_plottitle(data, grps, i, sep = "\n") # copy data x <- tmp[[name.x]] y <- tmp[[name.y]] grp <- tmp[[name.grp]] # plot plots <- gpt_helper( x, y, grp, colors, geom.size, shape.fill.color, shapes, title = tmp.title, axis.labels, axis.titles, legend.title, legend.labels, wrap.title, wrap.labels, wrap.legend.title, wrap.legend.labels, axis.lim, grid.breaks, show.total, annotate.total, show.p, show.n, name.x, name.y, name.grp ) # add plots, check for NULL results pl <- c(pl, list(plots)) } } else { # copy data x <- data[[name.x]] y <- data[[name.y]] grp <- data[[name.grp]] # plot pl <- gpt_helper( x, y, grp, colors, geom.size, shape.fill.color, shapes, title, axis.labels, axis.titles, legend.title, legend.labels, wrap.title, wrap.labels, wrap.legend.title, wrap.legend.labels, axis.lim, grid.breaks, show.total, annotate.total, show.p, show.n, name.x, name.y, name.grp ) } pl } gpt_helper <- function( x, y, grp, colors, geom.size, shape.fill.color, shapes, title, axis.labels, axis.titles, legend.title, legend.labels, wrap.title, wrap.labels, wrap.legend.title, wrap.legend.labels, axis.lim, grid.breaks, show.total, annotate.total, show.p, show.n, name.x, name.y, name.grp ) { # any missing names? if (is.null(name.x) || name.x == "NULL") name.x <- "" if (is.null(name.y) || name.y == "NULL") name.y <- "" if (is.null(name.grp) || name.grp == "NULL") name.grp <- "" # copy titles if (is.null(axis.titles)) { axisTitle.x <- NULL axisTitle.y <- NULL } else { axisTitle.x <- axis.titles[1] if (length(axis.titles) > 1) axisTitle.y <- axis.titles[2] else axisTitle.y <- NULL } # try to automatically set labels if not passed as argument x <- suppressMessages(sjmisc::to_factor(x)) ylabels <- sjlabelled::get_labels( y, attr.only = F, values = NULL, non.labelled = T ) # get only value label for hightest category ylabels <- ylabels[length(ylabels)] if (is.null(axis.labels)) { axis.labels <- sjlabelled::get_labels( grp, attr.only = F, values = NULL, non.labelled = T ) } if (is.null(axisTitle.y)) { axisTitle.y <- paste0( "Proportion of ", sjlabelled::get_label(x, def.value = name.x), " in ", sjlabelled::get_label(y, def.value = name.y), " (", ylabels, ")" ) } if (is.null(legend.title)) { legend.title <- sjlabelled::get_label(x, def.value = name.x) } if (is.null(legend.labels)) { legend.labels <- sjlabelled::get_labels( x, attr.only = F, values = NULL, non.labelled = T ) } # set labels that are still missing, but which need values if (is.null(axis.labels)) axis.labels <- as.character(seq_len(length(grp))) # wrap titles and labels if (!is.null(legend.labels)) legend.labels <- sjmisc::word_wrap(legend.labels, wrap.legend.labels) if (!is.null(legend.title)) legend.title <- sjmisc::word_wrap(legend.title, wrap.legend.title) if (!is.null(title)) title <- sjmisc::word_wrap(title, wrap.title) if (!is.null(axisTitle.x)) axisTitle.x <- sjmisc::word_wrap(axisTitle.x, wrap.title) if (!is.null(axisTitle.y)) axisTitle.y <- sjmisc::word_wrap(axisTitle.y, wrap.title) if (!is.null(axis.labels)) axis.labels <- sjmisc::word_wrap(axis.labels, wrap.labels) # final data frame for plot newdf <- data.frame() group.p <- character() group.n <- character() # create data frame, for dplyr-chain mydf <- stats::na.omit(data.frame( grp = sjlabelled::as_numeric(grp, keep.labels = F), xpos = x, dep = sjlabelled::as_numeric(y, keep.labels = F) )) # recode dependent variable's categorues # max and all others, so we have proportion # between maximux value and rest mydf$dep <- sjmisc::rec(mydf$dep, rec = "max=1;else=0", append = FALSE) # group data by grouping variable, and inside # groups, group the x-variable newdf <- mydf %>% dplyr::group_by(.data$grp, .data$xpos) %>% dplyr::summarise(ypos = mean(.data$dep)) # group data by grouping variable, # and summarize N per group and chisq.test # of grp and x within each group pvals <- mydf %>% dplyr::group_by(.data$grp) %>% dplyr::summarise(N = dplyr::n(), p = suppressWarnings(stats::chisq.test(table(.data$xpos, .data$dep))$p.value)) # copy p values for (i in seq_len(length(pvals$grp))) group.p[i] <- get_p_stars(pvals$p[i]) # copy N for (i in seq_len(length(pvals$grp))) group.n[i] <- prettyNum(pvals$N[i], big.mark = ",", scientific = F) # if we want total line, repeat all for # complete data frame if (show.total) { tmp <- mydf %>% dplyr::group_by(.data$xpos) %>% dplyr::summarise(ypos = mean(.data$dep)) # pvalues and N pvals <- mydf %>% dplyr::summarise(N = dplyr::n(), p = suppressWarnings(stats::chisq.test(table(.data$xpos, .data$dep))$p.value)) # bind total row to final df newdf <- dplyr::bind_rows(newdf, tmp) # copy p values group.p <- c(group.p, get_p_stars(pvals$p)) # copy N group.n <- c(group.n, prettyNum(pvals$N, big.mark = ",", scientific = F)) # add "total" to axis labels axis.labels <- c(axis.labels, "Total") } # make group variables categorical newdf$grp <- suppressMessages(sjmisc::to_factor(newdf$grp)) newdf$xpos <- suppressMessages(sjmisc::to_factor(newdf$xpos)) # proportion needs to be numeric newdf$ypos <- sjlabelled::as_numeric(newdf$ypos, keep.labels = F) # add N and p-values to axis labels? if (show.n) axis.labels <- paste0(axis.labels, " (n=", group.n, ")") if (show.p) axis.labels <- paste0(axis.labels, " ", group.p) # Set up axis limits if (is.null(axis.lim)) axis.lim <- c(0, max(pretty(max(newdf$ypos, na.rm = TRUE), 10))) # Set up grid breaks if (is.null(grid.breaks)) gridbreaks <- waiver() else gridbreaks <- seq(axis.lim[1], axis.lim[2], by = grid.breaks) # Set up geom colors pal.len <- length(legend.labels) geom.colors <- col_check2(colors, pal.len) # Set up plot p <- ggplot(newdf, aes(x = rev(.data$grp), y = .data$ypos, colour = .data$xpos, shape = .data$xpos)) + geom_point(size = geom.size, fill = shape.fill.color) + scale_y_continuous(labels = scales::percent, breaks = gridbreaks, limits = axis.lim) + scale_x_discrete(labels = rev(axis.labels)) + scale_shape_manual(name = legend.title, labels = legend.labels, values = shapes[1:pal.len]) + scale_colour_manual(name = legend.title, labels = legend.labels, values = geom.colors) + labs(x = axisTitle.x, y = axisTitle.y, title = title) + coord_flip() # Annotate total line? if (show.total && annotate.total) p <- p + annotate("rect", xmin = 0.5, xmax = 1.5, ymin = -Inf, ymax = Inf, alpha = 0.15) p } sjPlot/R/plot_xtab.R0000644000176200001440000004743313644120366014065 0ustar liggesusers#' @title Plot contingency tables #' @name plot_xtab #' #' @description Plot proportional crosstables (contingency tables) of two variables as ggplot diagram. #' #' @param x A vector of values (variable) describing the bars which make up the plot. #' @param grp Grouping variable of same length as \code{x}, where \code{x} #' is grouped into the categories represented by \code{grp}. #' @param type Plot type. may be either \code{"bar"} (default) for bar charts, #' or \code{"line"} for line diagram. #' @param margin Indicates which data of the proportional table should be plotted. Use \code{"row"} for #' calculating row percentages, \code{"col"} for column percentages and \code{"cell"} for cell percentages. #' If \code{margin = "col"}, an additional bar with the total sum of each column #' can be added to the plot (see \code{show.total}). #' @param rev.order Logical, if \code{TRUE}, order of categories (groups) is reversed. #' @param dot.size Dot size, only applies, when argument \code{type = "line"}. #' @param string.total String for the legend label when a total-column is added. Only applies #' if \code{show.total = TRUE}. Default is \code{"Total"}. #' @param show.total When \code{margin = "col"}, an additional bar #' with the sum within each category and it's percentages will be added #' to each category. #' #' @inheritParams plot_grpfrq #' #' @return A ggplot-object. #' #' @examples #' # create 4-category-items #' grp <- sample(1:4, 100, replace = TRUE) #' # create 3-category-items #' x <- sample(1:3, 100, replace = TRUE) #' #' # plot "cross tablulation" of x and grp #' plot_xtab(x, grp) #' #' # plot "cross tablulation" of x and y, including labels #' plot_xtab(x, grp, axis.labels = c("low", "mid", "high"), #' legend.labels = c("Grp 1", "Grp 2", "Grp 3", "Grp 4")) #' #' # plot "cross tablulation" of x and grp #' # as stacked proportional bars #' plot_xtab(x, grp, margin = "row", bar.pos = "stack", #' show.summary = TRUE, coord.flip = TRUE) #' #' # example with vertical labels #' library(sjmisc) #' library(sjlabelled) #' data(efc) #' set_theme(geom.label.angle = 90) #' plot_xtab(efc$e42dep, efc$e16sex, vjust = "center", hjust = "bottom") #' #' # grouped bars with EUROFAMCARE sample dataset #' # dataset was importet from an SPSS-file, #' # see ?sjmisc::read_spss #' data(efc) #' efc.val <- get_labels(efc) #' efc.var <- get_label(efc) #' #' plot_xtab(efc$e42dep, efc$e16sex, title = efc.var['e42dep'], #' axis.labels = efc.val[['e42dep']], legend.title = efc.var['e16sex'], #' legend.labels = efc.val[['e16sex']]) #' #' plot_xtab(efc$e16sex, efc$e42dep, title = efc.var['e16sex'], #' axis.labels = efc.val[['e16sex']], legend.title = efc.var['e42dep'], #' legend.labels = efc.val[['e42dep']]) #' #' # ------------------------------- #' # auto-detection of labels works here #' # so no need to specify labels. For #' # title-auto-detection, use NULL #' # ------------------------------- #' plot_xtab(efc$e16sex, efc$e42dep, title = NULL) #' #' plot_xtab(efc$e16sex, efc$e42dep, margin = "row", #' bar.pos = "stack", coord.flip = TRUE) #' #' @import ggplot2 #' @importFrom dplyr group_by mutate arrange filter select summarize #' @importFrom scales percent #' @importFrom stats na.omit #' @export plot_xtab <- function(x, grp, type = c("bar", "line"), margin = c("col", "cell", "row"), bar.pos = c("dodge", "stack"), title = "", title.wtd.suffix = NULL, axis.titles = NULL, axis.labels = NULL, legend.title = NULL, legend.labels = NULL, weight.by = NULL, rev.order = FALSE, show.values = TRUE, show.n = TRUE, show.prc = TRUE, show.total = TRUE, show.legend = TRUE, show.summary = FALSE, summary.pos = "r", drop.empty = TRUE, string.total = "Total", wrap.title = 50, wrap.labels = 15, wrap.legend.title = 20, wrap.legend.labels = 20, geom.size = 0.7, geom.spacing = 0.1, geom.colors = "Paired", dot.size = 3, smooth.lines = FALSE, grid.breaks = 0.2, expand.grid = FALSE, ylim = NULL, vjust = "bottom", hjust = "center", y.offset = NULL, coord.flip = FALSE) { # -------------------------------------------------------- # get variable name # -------------------------------------------------------- var.name.cnt <- get_var_name(deparse(substitute(x))) var.name.grp <- get_var_name(deparse(substitute(grp))) # -------------------------------------------------------- # match arguments # -------------------------------------------------------- bar.pos <- match.arg(bar.pos) type <- match.arg(type) margin <- match.arg(margin) # remove empty value-labels if (drop.empty) { x <- sjlabelled::drop_labels(x) grp <- sjlabelled::drop_labels(grp) } # -------------------------------------------------------- # copy titles # -------------------------------------------------------- if (is.null(axis.titles)) { axisTitle.x <- NULL axisTitle.y <- NULL } else { axisTitle.x <- axis.titles[1] if (length(axis.titles) > 1) axisTitle.y <- axis.titles[2] else axisTitle.y <- NULL } # -------------------------------------------------------- # grid-expansion # -------------------------------------------------------- if (expand.grid) { expand.grid <- waiver() } else { expand.grid <- c(0, 0) } # -------------------------------------------------------- # set text label offset # -------------------------------------------------------- if (is.null(y.offset)) { # stacked bars? if (bar.pos == "stack") { y_offset <- 0 } else { y.offset <- .005 if (coord.flip) { if (missing(vjust)) vjust <- "center" if (missing(hjust)) hjust <- "bottom" if (hjust == "bottom") y_offset <- y.offset else if (hjust == "top") y_offset <- -y.offset else y_offset <- 0 } else { if (vjust == "bottom") y_offset <- y.offset else if (vjust == "top") y_offset <- -y.offset else y_offset <- 0 } } } else { y_offset <- y.offset } # -------------------------------------------------------- # total column only applies to column percentages # -------------------------------------------------------- if (margin != "col") show.total <- FALSE # need to set this to FALSE if (!show.n && !show.prc) show.values <- F # -------------------------------------------------------- # create cross table of frequencies and percentages # -------------------------------------------------------- mydat <- create.xtab.df(x, grp, round.prz = 2, na.rm = T, weight.by = weight.by) # -------------------------------------------------------- # x-position as numeric factor, added later after # tidying # -------------------------------------------------------- bars.xpos <- seq_len(nrow(mydat$mydat)) # -------------------------------------------------------- # try to automatically set labels is not passed as argument # -------------------------------------------------------- if (is.null(axis.labels)) axis.labels <- mydat$labels.cnt if (is.null(legend.labels)) legend.labels <- mydat$labels.grp if (is.null(axisTitle.x)) axisTitle.x <- sjlabelled::get_label(x, def.value = var.name.cnt) if (is.null(legend.title)) legend.title <- sjlabelled::get_label(grp, def.value = var.name.grp) if (is.null(title)) { t1 <- sjlabelled::get_label(x, def.value = var.name.cnt) t2 <- sjlabelled::get_label(grp, def.value = var.name.grp) if (!is.null(t1) && !is.null(t2)) title <- paste0(t1, " by ", t2) } # -------------------------------------------------------- # remove titles if empty # -------------------------------------------------------- if (!is.null(legend.title) && legend.title == "") legend.title <- NULL if (!is.null(axisTitle.x) && axisTitle.x == "") axisTitle.x <- NULL if (!is.null(axisTitle.y) && axisTitle.y == "") axisTitle.y <- NULL if (!is.null(title) && title == "") title <- NULL # -------------------------------------------------------- # Check if user wants to add total column, and if so, # define amount of categories # -------------------------------------------------------- if (show.total) legend.labels <- c(legend.labels, string.total) grpcount <- length(legend.labels) # ----------------------------------------------- # check whether row, column or cell percentages are requested #--------------------------------------------------- if (margin == "cell") myptab <- mydat$proptab.cell else if (margin == "col") myptab <- mydat$proptab.col else if (margin == "row") myptab <- mydat$proptab.row myptab <- rownames_as_column(data.frame(myptab)) # ----------------------------------------------- # tidy data #--------------------------------------------------- mydf <- .gather(myptab, names_to = "group", values_to = "prc", columns = 2:(grpcount + 1)) mydf$group <- factor(mydf$group, levels = unique(mydf$group)) # ----------------------------------------------- # add total column and row to n-values #--------------------------------------------------- if (margin != "row") mydat$mydat$total <- unname(rowSums(mydat$mydat[, -1])) if (margin != "col") mydat$mydat <- rbind(mydat$mydat, c("total", unname(colSums(mydat$mydat[, -1])))) # ----------------------------------------------- # add n-values to tidy data frame #--------------------------------------------------- dummydf <- .gather(mydat$mydat, names_to = "group", values_to = "n", columns = 2:(grpcount + 1)) mydf$n <- as.numeric(dummydf$n) # ----------------------------------------------- # remove total for row and column index #--------------------------------------------------- if (margin != "col") mydf <- dplyr::filter(mydf, .data$rowname != "total") if (margin == "cell") mydf <- dplyr::select(mydf, -.data$total) # -------------------------------------------------------- # add xpos now # -------------------------------------------------------- mydf$xpos <- as.factor(as.numeric(bars.xpos)) # -------------------------------------------------------- # add half of Percentage values as new y-position for stacked bars # -------------------------------------------------------- mydf <- mydf %>% dplyr::group_by(.data$xpos) %>% dplyr::mutate(ypos = cumsum(.data$prc) - 0.5 * .data$prc) %>% dplyr::arrange(.data$group) # -------------------------------------------------------- # add line-break char # -------------------------------------------------------- if (show.prc && show.n) { mydf$line.break <- ifelse(isTRUE(coord.flip), ' ', '\n') } else { mydf$line.break <- "" } # -------------------------------------------------------- # define label position for dodged bars # -------------------------------------------------------- if (bar.pos == "dodge") mydf$ypos <- mydf$prc # -------------------------------------------------------- # finally, percentage values need to be between 0 and 1 # -------------------------------------------------------- mydf$prc <- mydf$prc / 100 mydf$ypos <- mydf$ypos / 100 # -------------------------------------------------------- # Prepare and trim legend labels to appropriate size # -------------------------------------------------------- if (!is.null(legend.labels)) legend.labels <- sjmisc::word_wrap(legend.labels, wrap.legend.labels) if (!is.null(legend.title)) legend.title <- sjmisc::word_wrap(legend.title, wrap.legend.title) if (!is.null(title)) { # if we have weighted values, say that in diagram's title if (!is.null(title.wtd.suffix)) title <- paste(title, title.wtd.suffix, sep = "") title <- sjmisc::word_wrap(title, wrap.title) } if (!is.null(axisTitle.x)) axisTitle.x <- sjmisc::word_wrap(axisTitle.x, wrap.title) if (!is.null(axisTitle.y)) axisTitle.y <- sjmisc::word_wrap(axisTitle.y, wrap.title) if (!is.null(axis.labels)) axis.labels <- sjmisc::word_wrap(axis.labels, wrap.labels) # ---------------------------- # create expression with model summarys. used # for plotting in the diagram later # ---------------------------- if (show.summary) { modsum <- crosstabsum(x, grp, weight.by) } else { modsum <- NULL } # -------------------------------------------------------- # Prepare bar charts # -------------------------------------------------------- # calculate upper y-axis-range # if we have a fixed value, use this one here lower_lim <- 0 # calculate upper y-axis-range # if we have a fixed value, use this one here if (!is.null(ylim) && length(ylim) == 2) { lower_lim <- ylim[1] upper_lim <- ylim[2] } else if (bar.pos == "stack") { # check upper limits. we may have rounding errors, so values # sum up to more than 100% ul <- max(mydf %>% dplyr::group_by(.data$rowname) %>% dplyr::summarize(ges = sum(.data$prc)) %>% dplyr::select(.data$ges), na.rm = T) if (ul > 1L) upper_lim <- ul else upper_lim <- 1 } else { # factor depends on labels if (show.values) mlp <- 1.2 else mlp <- 1.1 # else calculate upper y-axis-range depending # on the amount of max. answers per category upper_lim <- max(mydf$prc) * mlp } # -------------------------------------------------------- # check if category-oder on x-axis should be reversed # change category label order then # -------------------------------------------------------- if (rev.order) { axis.labels <- rev(axis.labels) mydf$xpos <- rev(mydf$xpos) } # -------------------------------------------------------- # align dodged position of labels to bar positions # -------------------------------------------------------- posdodge <- ifelse(type == "line", 0, geom.size + geom.spacing) # -------------------------------------------------------- # Set value labels # -------------------------------------------------------- if (show.values) { # if we have dodged bars or dots, we have to use a slightly dodged position for labels # as well, sofor better reading if (bar.pos == "dodge") { if (show.prc && show.n) { ggvaluelabels <- geom_text(aes(y = .data$ypos + y_offset, label = sprintf("%.01f%%%s(n=%i)", 100 * .data$prc, .data$line.break, .data$n)), position = position_dodge(posdodge), vjust = vjust, hjust = hjust) } else if (show.prc) { ggvaluelabels <- geom_text(aes(y = .data$ypos + y_offset, label = sprintf("%.01f%%", 100 * .data$prc)), position = position_dodge(posdodge), vjust = vjust, hjust = hjust) } else if (show.n) { ggvaluelabels <- geom_text(aes(y = .data$ypos + y_offset, label = sprintf("n=%i", .data$n)), position = position_dodge(posdodge), vjust = vjust, hjust = hjust) } } else { if (show.prc && show.n) { ggvaluelabels <- geom_text(aes(y = .data$ypos, label = sprintf("%.01f%%%s(n=%i)", 100 * .data$prc, .data$line.break, .data$n)), vjust = vjust, hjust = hjust) } else if (show.prc) { ggvaluelabels <- geom_text(aes(y = .data$ypos, label = sprintf("%.01f%%", 100 * .data$prc)), vjust = vjust, hjust = hjust) } else if (show.n) { ggvaluelabels <- geom_text(aes(y = .data$ypos, label = sprintf("n=%i", .data$n)), vjust = vjust, hjust = hjust) } } } else { ggvaluelabels <- geom_text(aes_string(y = "ypos"), label = "") } # -------------------------------------------------------- # Set up grid breaks # -------------------------------------------------------- if (is.null(grid.breaks)) { gridbreaks <- waiver() } else { gridbreaks <- seq(lower_lim, upper_lim, by = grid.breaks) } # ---------------------------------- # construct final plot, base constructor # first, set x scale # ---------------------------------- if (type == "line") scalex <- scale_x_continuous(labels = axis.labels) else scalex <- scale_x_discrete(labels = axis.labels) # ---------------------------------- # check whether bars or lines should be printed # ---------------------------------- if (type == "bar") { if (bar.pos == "dodge") { geob <- geom_bar(stat = "identity", position = position_dodge(posdodge), width = geom.size) } else { geob <- geom_bar(stat = "identity", position = position_stack(reverse = TRUE), width = geom.size) } # check if we have lines } else if (type == "line") { # for lines, numeric scale mydf$xpos <- sjlabelled::as_numeric(mydf$xpos, keep.labels = F) line.stat <- ifelse(isTRUE(smooth.lines), "smooth", "identity") geob <- geom_line(aes_string(colour = "group"), size = geom.size, stat = line.stat) } # -------------------------------------------------------- # start plot here # -------------------------------------------------------- baseplot <- ggplot(mydf, aes_string(x = "xpos", y = "prc", fill = "group")) + geob # if we have line diagram, print lines here if (type == "line") { baseplot <- baseplot + geom_point(size = dot.size, shape = 21, show.legend = FALSE) } # ------------------------------------------ # check whether table summary should be printed # ------------------------------------------ baseplot <- print.table.summary(baseplot, modsum, summary.pos) baseplot <- baseplot + # show absolute and percentage value of each bar. ggvaluelabels + # no additional labels for the x- and y-axis, only diagram title labs(title = title, x = axisTitle.x, y = axisTitle.y, fill = legend.title) + # print value labels to the x-axis. # If argument "axis.labels" is NULL, the category numbers (1 to ...) # appear on the x-axis scalex + # set Y-axis, depending on the calculated upper y-range. # It either corresponds to the maximum amount of cases in the data set # (length of var) or to the highest count of var's categories. scale_y_continuous(breaks = gridbreaks, limits = c(lower_lim, upper_lim), expand = expand.grid, labels = scales::percent) # check whether coordinates should be flipped, i.e. # swap x and y axis if (coord.flip) baseplot <- baseplot + coord_flip() # --------------------------------------------------------- # set geom colors # --------------------------------------------------------- sj.setGeomColors( baseplot, geom.colors, length(legend.labels), show.legend, legend.labels ) } sjPlot/R/save_plot.R0000644000176200001440000000743513446531454014067 0ustar liggesusers#' @title Save ggplot-figure for print publication #' @name save_plot #' #' @description Convenient function to save the last ggplot-figure in #' high quality for publication. #' #' @param filename Name of the output file; filename must end with one #' of the following accepted file types: ".png", ".jpg", ".svg" or ".tif". #' @param fig The plot that should be saved. By default, the last plot is saved. #' @param width Width of the figure, in centimetres. #' @param height Height of the figure, in centimetres. #' @param dpi Resolution in dpi (dots per inch). Ignored for vector formats, such as ".svg". #' @param theme The default theme to use when saving the plot. #' @param label.color Color value for labels (axis, plot, etc.). #' @param label.size Fontsize of value labels inside plot area. #' @param axis.textsize Fontsize of axis labels. #' @param axis.titlesize Fontsize of axis titles. #' @param legend.textsize Fontsize of legend labels. #' @param legend.titlesize Fontsize of legend title. #' @param legend.itemsize Size of legend's item (legend key), in centimetres. #' #' @note This is a convenient function with some default settings that should #' come close to most of the needs for fontsize and scaling in figures #' when saving them for printing or publishing. It uses cairographics #' anti-aliasing (see \code{\link[grDevices]{png}}). #' \cr \cr #' For adjusting plot appearance, see also \code{\link{sjPlot-themes}}. #' #' @import ggplot2 #' @importFrom grDevices png jpeg tiff dev.off cm svg #' @export save_plot <- function(filename, fig = last_plot(), width = 12, height = 9, dpi = 300, theme = theme_get(), label.color = "black", label.size = 2.4, axis.textsize = .8, axis.titlesize = .75, legend.textsize = .6, legend.titlesize = .65, legend.itemsize = .5) { # get file extension ext <- tolower(substring(filename, regexpr("\\.[^\\.]*$", filename) + 1, nchar(filename))) # valid file ytpe? if (!ext %in% c("png", "jpg", "tif", "svg")) stop("filetype must be one of `.png`, `.jpg`, '.svg' or `.tif`.", call. = F) # set printable theme, adjust font sizes. # this is the most critical point... set_theme( base = theme, geom.label.color = label.color, axis.title.color = label.color, axis.textcolor = label.color, legend.title.color = label.color, legend.color = label.color, geom.label.size = label.size, axis.textsize = axis.textsize, axis.title.size = axis.titlesize, legend.size = legend.textsize, legend.title.size = legend.titlesize, legend.item.size = legend.itemsize ) # prapare save if (ext == "png") grDevices::png( filename = filename, width = width, height = height, units = "cm", res = dpi, type = "cairo" ) else if (ext == "jpg") grDevices::jpeg( filename = filename, width = width, height = height, units = "cm", res = dpi, type = "cairo" ) else if (ext == "tif") grDevices::tiff( filename = filename, width = width, height = height, units = "cm", res = dpi, type = "cairo" ) else if (ext == 'svg') grDevices::svg( filename = filename, width = width / grDevices::cm(1), height = height / grDevices::cm(1) ) # print plot to device graphics::plot(fig) # close device grDevices::dev.off() } sjPlot/R/view_df.R0000644000176200001440000005055113611404371013502 0ustar liggesusers#' @title View structure of labelled data frames #' @name view_df #' #' @description Save (or show) content of an imported SPSS, SAS or Stata data file, #' or any similar labelled \code{data.frame}, as HTML table. #' This quick overview shows variable ID number, name, label, #' type and associated value labels. The result can be #' considered as "codeplan" of the data frame. #' #' @param x A (labelled) data frame, imported by \code{\link[sjlabelled]{read_spss}}, #' \code{\link[sjlabelled]{read_sas}} or \code{\link[sjlabelled]{read_stata}} function, #' or any similar labelled data frame (see \code{\link[sjlabelled]{set_label}} #' and \code{\link[sjlabelled]{set_labels}}). #' @param weight.by Name of variable in \code{x} that indicated the vector of #' weights that will be applied to weight all observations. Default is #' \code{NULL}, so no weights are used. #' @param show.id Logical, if \code{TRUE} (default), the variable ID is shown in #' the first column. #' @param show.values Logical, if \code{TRUE} (default), the variable values #' are shown as additional column. #' @param show.string.values Logical, if \code{TRUE}, elements of character vectors #' are also shown. By default, these are omitted due to possibly overlengthy #' tables. #' @param show.labels Logical, if \code{TRUE} (default), the value labels are #' shown as additional column. #' @param show.frq Logical, if \code{TRUE}, an additional column with #' frequencies for each variable is shown. #' @param show.prc Logical, if \code{TRUE}, an additional column with percentage #' of frequencies for each variable is shown. #' @param show.wtd.frq Logical, if \code{TRUE}, an additional column with weighted #' frequencies for each variable is shown. Weights strem from \code{weight.by}. #' @param show.wtd.prc Logical, if \code{TRUE}, an additional column with weighted #' percentage of frequencies for each variable is shown. Weights strem from #' \code{weight.by}. #' @param sort.by.name Logical, if \code{TRUE}, rows are sorted according to the #' variable names. By default, rows (variables) are ordered according to their #' order in the data frame. #' @param max.len Numeric, indicates how many values and value labels per variable #' are shown. Useful for variables with many different values, where the output #' can be truncated. #' @param verbose Logical, if \code{TRUE}, a progress bar is displayed #' while creating the output. #' #' @inheritParams tab_df #' @inheritParams tab_model #' @inheritParams tab_xtab #' @inheritParams plot_grpfrq #' #' @return Invisibly returns #' \itemize{ #' \item the web page style sheet (\code{page.style}), #' \item the web page content (\code{page.content}), #' \item the complete html-output (\code{page.complete}) and #' \item the html-table with inline-css for use with knitr (\code{knitr}) #' } #' for further use. #' #' @examples #' \dontrun{ #' # init dataset #' data(efc) #' #' # view variables #' view_df(efc) #' #' # view variables w/o values and value labels #' view_df(efc, show.values = FALSE, show.labels = FALSE) #' #' # view variables including variable typed, orderd by name #' view_df(efc, sort.by.name = TRUE, show.type = TRUE) #' #' # User defined style sheet #' view_df(efc, #' CSS = list(css.table = "border: 2px solid;", #' css.tdata = "border: 1px solid;", #' css.arc = "color:blue;"))} #' #' @importFrom utils txtProgressBar setTxtProgressBar #' @importFrom sjmisc is_even var_type is_float #' @importFrom sjlabelled get_values drop_labels #' @importFrom purrr map_lgl #' @importFrom rlang quo_name enquo #' @export view_df <- function(x, weight.by = NULL, alternate.rows = TRUE, show.id = TRUE, show.type = FALSE, show.values = TRUE, show.string.values = FALSE, show.labels = TRUE, show.frq = FALSE, show.prc = FALSE, show.wtd.frq = FALSE, show.wtd.prc = FALSE, show.na = FALSE, max.len = 15, sort.by.name = FALSE, wrap.labels = 50, verbose = FALSE, CSS = NULL, encoding = NULL, file = NULL, use.viewer = TRUE, remove.spaces = TRUE) { # check encoding encoding <- get.encoding(encoding, x) # make data frame of single variable, so we have # unique handling for the data if (!is.data.frame(x)) stop("`x` needs to be a data frame!", call. = FALSE) # save name of object dfname <- deparse(substitute(x)) if (!missing(weight.by)) { weights <- rlang::quo_name(rlang::enquo(weight.by)) w.string <- tryCatch( { eval(weight.by) }, error = function(x) { NULL }, warning = function(x) { NULL }, finally = function(x) { NULL } ) if (!is.null(w.string) && is.character(w.string)) weights <- w.string if (sjmisc::is_empty(weights) || weights == "NULL") weights <- NULL } else weights <- NULL # variables with all missings? all.na <- purrr::map_lgl(x, ~ all(is.na(.x))) id <- seq_len(ncol(x)) cnames <- colnames(x) # do we have any "all-missing-variables"? if (any(all.na)) { rem.col <- seq_len(ncol(x))[all.na] message(sprintf("Following %i variables have only missing values and are not shown:", sum(all.na))) cat(paste(sprintf("%s [%i]", cnames[all.na], rem.col), collapse = ", ")) cat("\n") id <- id[!all.na] cnames <- cnames[!all.na] } # retrieve value and variable labels df.var <- sjlabelled::get_label(x) df.val <- sjlabelled::get_labels(x) # Order data set if requested if (sort.by.name) id <- id[order(cnames)] # init style sheet and tags used for css-definitions # we can use these variables for string-replacement # later for return value tag.table <- "table" tag.thead <- "thead" tag.tdata <- "tdata" tag.arc <- "arc" tag.caption <- "caption" tag.omit <- "omit" css.table <- "border-collapse:collapse; border:none;" css.thead <- "border-bottom:double; font-style:italic; font-weight:normal; padding:0.2cm; text-align:left; vertical-align:top;" css.tdata <- "padding:0.2cm; text-align:left; vertical-align:top;" css.arc <- "background-color:#eeeeee" css.caption <- "font-weight: bold; text-align:left;" css.omit <- "color:#999999;" # check user defined style sheets if (!is.null(CSS)) { if (!is.null(CSS[['css.table']])) css.table <- ifelse(substring(CSS[['css.table']], 1, 1) == '+', paste0(css.table, substring(CSS[['css.table']], 2)), CSS[['css.table']]) if (!is.null(CSS[['css.thead']])) css.thead <- ifelse(substring(CSS[['css.thead']], 1, 1) == '+', paste0(css.thead, substring(CSS[['css.thead']], 2)), CSS[['css.thead']]) if (!is.null(CSS[['css.tdata']])) css.tdata <- ifelse(substring(CSS[['css.tdata']], 1, 1) == '+', paste0(css.tdata, substring(CSS[['css.tdata']], 2)), CSS[['css.tdata']]) if (!is.null(CSS[['css.arc']])) css.arc <- ifelse(substring(CSS[['css.arc']], 1, 1) == '+', paste0(css.arc, substring(CSS[['css.arc']], 2)), CSS[['css.arc']]) if (!is.null(CSS[['css.caption']])) css.caption <- ifelse(substring(CSS[['css.caption']], 1, 1) == '+', paste0(css.caption, substring(CSS[['css.caption']], 2)), CSS[['css.caption']]) if (!is.null(CSS[['css.omit']])) css.omit <- ifelse(substring(CSS[['css.omit']], 1, 1) == '+', paste0(css.omit, substring(CSS[['css.omit']], 2)), CSS[['css.omit']]) } # set style sheet page.style <- sprintf("", tag.table, css.table, tag.thead, css.thead, tag.tdata, css.tdata, tag.arc, css.arc, tag.caption, css.caption, tag.omit, css.omit) # table init toWrite <- sprintf("\n\n\n%s\n\n\n", encoding, page.style) # table caption, data frame name page.content <- sprintf("\n \n", dfname) # header row page.content <- paste0(page.content, " \n ") if (show.id) page.content <- paste0(page.content, "") page.content <- paste0(page.content, "") if (show.type) page.content <- paste0(page.content, "") page.content <- paste0(page.content, "") if (show.na) page.content <- paste0(page.content, "") if (show.values) page.content <- paste0(page.content, "") if (show.labels) page.content <- paste0(page.content, "") if (show.frq) page.content <- paste0(page.content, "") if (show.prc) page.content <- paste0(page.content, "") if (show.wtd.frq) page.content <- paste0(page.content, "") if (show.wtd.prc) page.content <- paste0(page.content, "") page.content <- paste0(page.content, "\n \n") # create progress bar if (verbose) pb <- utils::txtProgressBar(min = 0, max = length(id), style = 3) # subsequent rows for (ccnt in 1:length(id)) { # get index number, depending on sorting index <- id[ccnt] # default row string arcstring <- "" # if we have alternating row colors, set css if (alternate.rows) arcstring <- ifelse(sjmisc::is_even(ccnt), " arc", "") page.content <- paste0(page.content, " \n") # ID if (show.id) page.content <- paste0(page.content, sprintf(" \n", arcstring, index)) # name, and note if (!is.list(x[[index]]) && !is.null(comment(x[[index]]))) td.title.tag <- sprintf(" title=\"%s\"", comment(x[[index]])) else td.title.tag <- "" page.content <- paste0( page.content, sprintf( " \n", arcstring, td.title.tag, colnames(x)[index] ) ) # type if (show.type) { vartype <- sjmisc::var_type(x[[index]]) page.content <- paste0(page.content, sprintf(" \n", arcstring, vartype)) } # label if (index <= length(df.var)) { varlab <- df.var[index] if (!is.null(wrap.labels)) { # wrap long variable labels varlab <- sjmisc::word_wrap(varlab, wrap.labels, "
") } } else { varlab <- "" } page.content <- paste0(page.content, sprintf(" \n", arcstring, varlab)) # missings and missing percentage if (show.na) { if (is.list(x[[index]])) { page.content <- paste0( page.content, sprintf(" \n", arcstring) ) } else { page.content <- paste0( page.content, sprintf( " \n", arcstring, sum(is.na(x[[index]]), na.rm = T), 100 * sum(is.na(x[[index]]), na.rm = T) / nrow(x) ) ) } } # if value labels are shown, and we have numeric, non-labelled vectors, # show range istead of value labels here if (is.numeric(x[[index]]) && !has_value_labels(x[[index]])) { if (show.values || show.labels) { if (sjmisc::is_float(x[[index]])) valstring <- paste0(sprintf("%.1f", range(x[[index]], na.rm = T)), collapse = "-") else valstring <- paste0(sprintf("%i", as.integer(range(x[[index]], na.rm = T))), collapse = "-") if (show.values && show.labels) { colsp <- " colspan=\"2\"" valstring <- paste0("range: ", valstring, "") } else { colsp <- "" } page.content <- paste0(page.content, sprintf(" \n", arcstring, colsp, valstring)) } } else { # values if (show.values) { valstring <- "" # do we have valid index? if (index <= ncol(x)) { if (is.list(x[[index]])) { valstring <- "<list>" } else { # if yes, get variable values vals <- sjlabelled::get_values(x[[index]]) # check if we have any values... if (!is.null(vals)) { # if we have values, put all values into a string loop <- stats::na.omit(seq_len(length(vals))[1:max.len]) for (i in loop) { valstring <- paste0(valstring, vals[i]) if (i < length(vals)) valstring <- paste0(valstring, "
") } if (max.len < length(vals)) valstring <- paste0(valstring, "<...>") } } } else { valstring <- "" } page.content <- paste0(page.content, sprintf(" \n", arcstring, valstring)) } # value labels if (show.labels) { valstring <- "" # do we have valid index? if (index <= length(df.val)) { if (is.list(x[[index]])) { valstring <- "<list>" } else { # if yes, get value labels # the code here corresponds to the above code # for variable values vals <- df.val[[index]] if (!is.null(vals)) vals <- stats::na.omit(vals) # sort character vectors if (is.character(x[[index]]) && !is.null(vals) && !sjmisc::is_empty(vals)) { if (show.string.values) vals <- sort(vals) else vals <- "<output omitted>" } # check if we have any values... if (!is.null(vals) && !sjmisc::is_empty(vals)) { if (is.character(x[[index]]) && !show.string.values) { valstring <- "<output omitted>" } else { # if yes, add all to a string loop <- stats::na.omit(seq_len(length(vals))[1:max.len]) for (i in loop) { valstring <- paste0(valstring, vals[i]) if (i < length(vals)) valstring <- paste0(valstring, "
") } if (max.len < length(vals)) valstring <- paste0(valstring, "<... truncated>") } } } } else { valstring <- "" } page.content <- paste0(page.content, sprintf(" \n", arcstring, valstring)) } } # frequencies if (show.frq) { if (is.list(x[[index]])) valstring <- "<list>" else if (is.character(x[[index]]) && !show.string.values) valstring <- "<output omitted>" else valstring <- frq.value(index, x, df.val, max.len = max.len) page.content <- paste0(page.content, sprintf( " \n", arcstring, valstring )) } # percentage of frequencies if (show.prc) { if (is.list(x[[index]])) valstring <- "<list>" else if (is.character(x[[index]]) && !show.string.values) valstring <- "<output omitted>" else valstring <- frq.value(index, x, df.val, as.prc = TRUE, max.len = max.len) page.content <- paste0(page.content, sprintf( " \n", arcstring, valstring )) } # frequencies if (show.wtd.frq && !is.null(weights)) { if (is.list(x[[index]])) valstring <- "<list>" else if (is.character(x[[index]]) && !show.string.values) valstring <- "<output omitted>" else valstring <- frq.value(index, x, df.val, weights, max.len = max.len) page.content <- paste0(page.content, sprintf( " \n", arcstring, valstring )) } # percentage of frequencies if (show.wtd.prc && !is.null(weights)) { if (is.list(x[[index]])) valstring <- "<list>" else if (is.character(x[[index]]) && !show.string.values) valstring <- "<output omitted>" else valstring <- frq.value(index, x, df.val, weights, as.prc = TRUE, max.len = max.len) page.content <- paste0(page.content, sprintf( " \n", arcstring, valstring )) } # update progress bar if (verbose) utils::setTxtProgressBar(pb, ccnt) # close row tag page.content <- paste0(page.content, " \n") } if (verbose) close(pb) # finish html page page.content <- paste(page.content, "
Data frame: %s
IDNameTypeLabelmissingsValuesValue LabelsFreq.%weighted Freq.weighted %
%i%s%s%s<list>%i (%.2f%%)%s%s%s%s%s%s%s
", sep = "\n") toWrite <- paste0(toWrite, sprintf("%s\n", page.content)) # replace class attributes with inline style, # useful for knitr knitr <- page.content # set style attributes for main table tags knitr <- gsub("class=", "style=", knitr, fixed = TRUE, useBytes = TRUE) knitr <- gsub("" } else { if (as.prc) frqs <- sprintf("%.2f", frqs$valid.prc) else frqs <- as.character(frqs$frq) if (length(frqs) > min(c(length(df.val[[index]]), max.len))) { frqs <- frqs[1:min(c(length(df.val[[index]]), max.len))] } valstring <- paste(frqs, collapse = "
") } } valstring } sjPlot/R/html_print_utils.R0000644000176200001440000005607514053120675015471 0ustar liggesusers#' @importFrom sjmisc is_empty str_start check_css_param <- function(CSS) { if (sjmisc::is_empty(CSS)) return(CSS) n <- names(CSS) nocss <- unlist(lapply(sjmisc::str_start(x = n, pattern = "css."), sjmisc::is_empty)) if (any(nocss)) { n[nocss] <- paste0("css.", n[nocss]) names(CSS) <- n } CSS } # This functions creates the body of the HTML page, i.e. it puts # the content of a data frame into a HTML table that is returned. #' @importFrom sjmisc is_empty var_type is_even trim tab_df_content <- function( mydf, title, footnote, col.header, show.type, show.rownames, show.footnote, altr.row.col, sort.column, include.table.tag = TRUE, no.last.table.row = FALSE, show.header = TRUE, zeroinf = FALSE, rnames = NULL, ...) { # save no of rows and columns rowcnt <- nrow(mydf) colcnt <- ncol(mydf) # check if data frame has CSS-attribute. must be a 2x2 matrix with same # dimension as data frame. CSS attributes are than mapped for each # value in the data frame. own.css <- attr(mydf, "CSS", exact = TRUE) if (!identical(dim(own.css), dim(mydf))) own.css <- NULL # check sorting if (!is.null(sort.column)) { sc <- abs(sort.column) if (sc < 1 || sc > colcnt) message("Column index in `sort.column` for sorting columns out of bounds. No sorting applied.") else { rows <- order(mydf[[sc]]) if (sort.column < 0) rows <- rev(rows) mydf <- mydf[rows, ] } } cnames <- colnames(mydf) # if user supplied own column header, which also has the same length # as no. columns, replace column names with user header if (!sjmisc::is_empty(col.header) && length(col.header) == length(cnames)) cnames <- col.header # check if rownames should be shown and data has any rownames at all # if so, we need to update our information on column names if (show.rownames && !is.null(rnames)) { mydf <- rownames_as_column(mydf, rownames = rnames) colcnt <- colcnt + 1 cnames <- c("Row", cnames) } # start table tag if (include.table.tag) page.content <- "\n" else page.content <- "" # table caption, variable label if (!sjmisc::is_empty(title)) page.content <- paste0(page.content, sprintf(" \n", title)) # header row ---- if (isTRUE(show.header)) { page.content <- paste0(page.content, " \n") for (i in 1:colcnt) { # separate CSS for first column ftc <- dplyr::if_else(i == 1, " firsttablecol", "", "") oc <- ifelse(is.null(own.css), "", sprintf(" %s", sjmisc::trim(own.css[1, i]))) # column names and variable type as table headline vartype <- sjmisc::var_type(mydf[[i]]) page.content <- paste0( page.content, sprintf(" \n") } page.content <- paste0(page.content, " \n") } if (isTRUE(zeroinf)) { page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, sprintf(" \n", colcnt + 1)) page.content <- paste0(page.content, " \n") } # subsequent rows ---- for (rcnt in 1:rowcnt) { # if we have alternating row colors, set css arcstring <- "" if (altr.row.col) arcstring <- ifelse(sjmisc::is_even(rcnt), " arc", "") ltr <- dplyr::if_else(rcnt == rowcnt & !isTRUE(no.last.table.row), " lasttablerow", "", "") page.content <- paste0(page.content, " \n") # all columns of a row for (ccnt in 1:colcnt) { # separate CSS for first column ftc <- dplyr::if_else(ccnt == 1, " firsttablecol", " centeralign", "") oc <- ifelse(is.null(own.css), "", sprintf(" %s", sjmisc::trim(own.css[rcnt, ccnt]))) # for regression models, column name ends with "_". use this # as css-class, so we may modify specific model columns model.column <- gsub("(.*)(\\_.*)(?=[0-9]$)", "\\3", colnames(mydf)[ccnt], perl = TRUE) mcn <- suppressWarnings(as.numeric(model.column)) if (nchar(model.column) == 1 && !is.na(mcn)) mcc <- sprintf(" modelcolumn%i", as.integer(mcn)) else mcc <- "" page.content <- paste0(page.content, sprintf( " \n", ftc, oc, ltr, arcstring, mcc, ccnt, mydf[[ccnt]][rcnt]) ) } page.content <- paste0(page.content, "\n") } # add optional "footnote" row ---- if (show.footnote) { page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, sprintf(" \n", colcnt + 1, footnote)) page.content <- paste0(page.content, "\n") } # finish html page ---- if (include.table.tag) page.content <- paste0(page.content, "
%s
%s", ftc, oc, i, cnames[i]) ) if (show.type) page.content <- paste0(page.content, sprintf("
(%s)", vartype)) page.content <- paste0(page.content, "
Count Model
%s
%s
\n") page.content } rmspc <- function(html.table) { cleaned <- gsub(" <", "<", html.table, fixed = TRUE, useBytes = TRUE) cleaned <- gsub(" <", "<", cleaned, fixed = TRUE, useBytes = TRUE) cleaned <- gsub(" <", "<", cleaned, fixed = TRUE, useBytes = TRUE) cleaned } # This function creates the CSS style sheet for HTML-output tab_df_style <- function(CSS = NULL, ...) { tab_df_prepare_style(CSS = CSS, content = NULL, task = 1, ...) } # This function creates the CSS style sheet for HTML-output, but # converts the style-definition into inline-CSS, which is required # for knitr documents, i.e. when HTML tables should be included in # knitr documents. tab_df_knitr <- function(CSS = NULL, content = NULL, ...) { tab_df_prepare_style(CSS = CSS, content = content, task = 2, ...) } # This functions creates the complete HTML page, include head and meta # section of the final HTML page. Required for display in the browser. tab_create_page <- function(style, content, encoding = "UTF-8") { if (is.null(encoding)) encoding <- "UTF-8" # first, save table header sprintf( "\n\n\n%s\n\n\n%s\n", encoding, style, content ) } # This function does the actual preparation and transformation of # the HTML style sheets, used by \code{tab_df_style()} and # \code{tab_df_knitr()} tab_df_prepare_style <- function(CSS = NULL, content = NULL, task, ...) { # init style sheet and tags used for css-definitions # we can use these variables for string-replacement # later for return value tag.table <- "table" tag.td <- "td" tag.caption <- "caption" tag.thead <- "thead" tag.tdata <- "tdata" tag.arc <- "arc" tag.footnote <- "footnote" tag.subtitle <- "subtitle" tag.firsttablerow <- "firsttablerow" tag.lasttablerow <- "lasttablerow" tag.firsttablecol <- "firsttablecol" tag.leftalign <- "leftalign" tag.centeralign <- "centeralign" tag.summary <- "summary" tag.summarydata <- "summarydata" tag.fixedparts <- "fixedparts" tag.randomparts <- "randomparts" tag.zeroparts <- "zeroparts" tag.simplexparts <- "simplexparts" tag.firstsumrow <- "firstsumrow" tag.labelcellborder <- "labelcellborder" tag.depvarhead <- "depvarhead" tag.depvarheadnodv <- "depvarheadnodv" tag.col1 <- "col1" tag.col2 <- "col2" tag.col3 <- "col3" tag.col4 <- "col4" tag.col5 <- "col5" tag.col6 <- "col6" tag.modelcolumn1 <- "modelcolumn1" tag.modelcolumn2 <- "modelcolumn2" tag.modelcolumn3 <- "modelcolumn3" tag.modelcolumn4 <- "modelcolumn4" tag.modelcolumn5 <- "modelcolumn5" tag.modelcolumn6 <- "modelcolumn6" tag.modelcolumn7 <- "modelcolumn7" css.table <- "border-collapse:collapse; border:none;" css.td <- "" css.caption <- "font-weight: bold; text-align:left;" css.thead <- "border-top: double; text-align:center; font-style:italic; font-weight:normal; padding:0.2cm;" css.tdata <- "padding:0.2cm; text-align:left; vertical-align:top;" css.arc <- "background-color:#f2f2f2;" css.lasttablerow <- "border-bottom: double;" css.firsttablerow <- "border-bottom:1px solid black;" css.firsttablecol <- "text-align:left;" css.leftalign <- "text-align:left;" css.centeralign <- "text-align:center;" css.footnote <- "font-style:italic; border-top:double black; text-align:right;" css.subtitle <- "font-weight: normal;" css.summary <- "padding-top:0.1cm; padding-bottom:0.1cm;" css.summarydata <- "text-align:center;" css.fixedparts <- "font-weight:bold; text-align:left;" css.randomparts <- "font-weight:bold; text-align:left; padding-top:.8em;" css.zeroparts <- "font-weight:bold; text-align:left; padding-top:.8em;" css.simplexparts <- "font-weight:bold; text-align:left; padding-top:.8em;" css.firstsumrow <- "border-top:1px solid;" css.labelcellborder <- "border-bottom:1px solid;" css.depvarhead <- "text-align:center; border-bottom:1px solid; font-style:italic; font-weight:normal;" css.depvarheadnodv <- "border-top: double; text-align:center; border-bottom:1px solid; font-style:italic; font-weight:normal;" css.col1 <- "" css.col2 <- "" css.col3 <- "" css.col4 <- "" css.col5 <- "" css.col6 <- "" css.modelcolumn1 <- "" css.modelcolumn2 <- "" css.modelcolumn3 <- "" css.modelcolumn4 <- "" css.modelcolumn5 <- "" css.modelcolumn6 <- "" css.modelcolumn7 <- "" # check user defined style sheets if (!is.null(CSS)) { if (!is.null(CSS[['css.table']])) css.table <- ifelse(substring(CSS[['css.table']], 1, 1) == '+', paste0(css.table, substring(CSS[['css.table']], 2)), CSS[['css.table']]) if (!is.null(CSS[['css.td']])) css.td <- ifelse(substring(CSS[['css.td']], 1, 1) == '+', paste0(css.td, substring(CSS[['css.td']], 2)), CSS[['css.td']]) if (!is.null(CSS[['css.caption']])) css.caption <- ifelse(substring(CSS[['css.caption']], 1, 1) == '+', paste0(css.caption, substring(CSS[['css.caption']], 2)), CSS[['css.caption']]) if (!is.null(CSS[['css.thead']])) css.thead <- ifelse(substring(CSS[['css.thead']], 1, 1) == '+', paste0(css.thead, substring(CSS[['css.thead']], 2)), CSS[['css.thead']]) if (!is.null(CSS[['css.tdata']])) css.tdata <- ifelse(substring(CSS[['css.tdata']], 1, 1) == '+', paste0(css.tdata, substring(CSS[['css.tdata']], 2)), CSS[['css.tdata']]) if (!is.null(CSS[['css.arc']])) css.arc <- ifelse(substring(CSS[['css.arc']], 1, 1) == '+', paste0(css.arc, substring(CSS[['css.arc']], 2)), CSS[['css.arc']]) if (!is.null(CSS[['css.lasttablerow']])) css.lasttablerow <- ifelse(substring(CSS[['css.lasttablerow']], 1, 1) == '+', paste0(css.lasttablerow, substring(CSS[['css.lasttablerow']], 2)), CSS[['css.lasttablerow']]) if (!is.null(CSS[['css.firsttablerow']])) css.firsttablerow <- ifelse(substring(CSS[['css.firsttablerow']], 1, 1) == '+', paste0(css.firsttablerow, substring(CSS[['css.firsttablerow']], 2)), CSS[['css.firsttablerow']]) if (!is.null(CSS[['css.leftalign']])) css.leftalign <- ifelse(substring(CSS[['css.leftalign']], 1, 1) == '+', paste0(css.leftalign, substring(CSS[['css.leftalign']], 2)), CSS[['css.leftalign']]) if (!is.null(CSS[['css.centeralign']])) css.centeralign <- ifelse(substring(CSS[['css.centeralign']], 1, 1) == '+', paste0(css.centeralign, substring(CSS[['css.centeralign']], 2)), CSS[['css.centeralign']]) if (!is.null(CSS[['css.firsttablecol']])) css.firsttablecol <- ifelse(substring(CSS[['css.firsttablecol']], 1, 1) == '+', paste0(css.firsttablecol, substring(CSS[['css.firsttablecol']], 2)), CSS[['css.firsttablecol']]) if (!is.null(CSS[['css.footnote']])) css.footnote <- ifelse(substring(CSS[['css.footnote']], 1, 1) == '+', paste0(css.footnote, substring(CSS[['css.footnote']], 2)), CSS[['css.footnote']]) if (!is.null(CSS[['css.subtitle']])) css.subtitle <- ifelse(substring(CSS[['css.subtitle']], 1, 1) == '+', paste0(css.subtitle, substring(CSS[['css.subtitle']], 2)), CSS[['css.subtitle']]) if (!is.null(CSS[['css.col1']])) css.col1 <- ifelse(substring(CSS[['css.col1']], 1, 1) == '+', paste0(css.col1, substring(CSS[['css.col1']], 2)), CSS[['css.col1']]) if (!is.null(CSS[['css.col2']])) css.col2 <- ifelse(substring(CSS[['css.col2']], 1, 1) == '+', paste0(css.col2, substring(CSS[['css.col2']], 2)), CSS[['css.col2']]) if (!is.null(CSS[['css.col3']])) css.col3 <- ifelse(substring(CSS[['css.col3']], 1, 1) == '+', paste0(css.col3, substring(CSS[['css.col3']], 2)), CSS[['css.col3']]) if (!is.null(CSS[['css.col4']])) css.col4 <- ifelse(substring(CSS[['css.col4']], 1, 1) == '+', paste0(css.col4, substring(CSS[['css.col4']], 2)), CSS[['css.col4']]) if (!is.null(CSS[['css.col5']])) css.col5 <- ifelse(substring(CSS[['css.col5']], 1, 1) == '+', paste0(css.col5, substring(CSS[['css.col5']], 2)), CSS[['css.col5']]) if (!is.null(CSS[['css.col6']])) css.col6 <- ifelse(substring(CSS[['css.col6']], 1, 1) == '+', paste0(css.col6, substring(CSS[['css.col6']], 2)), CSS[['css.col6']]) if (!is.null(CSS[['css.summary']])) css.summary <- ifelse(substring(CSS[['css.summary']], 1, 1) == '+', paste0(css.summary, substring(CSS[['css.summary']], 2)), CSS[['css.summary']]) if (!is.null(CSS[['css.summarydata']])) css.summarydata <- ifelse(substring(CSS[['css.summarydata']], 1, 1) == '+', paste0(css.summarydata, substring(CSS[['css.summarydata']], 2)), CSS[['css.summarydata']]) if (!is.null(CSS[['css.fixedparts']])) css.fixedparts <- ifelse(substring(CSS[['css.fixedparts']], 1, 1) == '+', paste0(css.fixedparts, substring(CSS[['css.fixedparts']], 2)), CSS[['css.fixedparts']]) if (!is.null(CSS[['css.randomparts']])) css.randomparts <- ifelse(substring(CSS[['css.randomparts']], 1, 1) == '+', paste0(css.randomparts, substring(CSS[['css.randomparts']], 2)), CSS[['css.randomparts']]) if (!is.null(CSS[['css.zeroparts']])) css.zeroparts <- ifelse(substring(CSS[['css.zeroparts']], 1, 1) == '+', paste0(css.zeroparts, substring(CSS[['css.zeroparts']], 2)), CSS[['css.zeroparts']]) if (!is.null(CSS[['css.simplexparts']])) css.simplexparts <- ifelse(substring(CSS[['css.simplexparts']], 1, 1) == '+', paste0(css.simplexparts, substring(CSS[['css.simplexparts']], 2)), CSS[['css.simplexparts']]) if (!is.null(CSS[['css.firstsumrow']])) css.firstsumrow <- ifelse(substring(CSS[['css.firstsumrow']], 1, 1) == '+', paste0(css.firstsumrow, substring(CSS[['css.firstsumrow']], 2)), CSS[['css.firstsumrow']]) if (!is.null(CSS[['css.labelcellborder']])) css.labelcellborder <- ifelse(substring(CSS[['css.labelcellborder']], 1, 1) == '+', paste0(css.table, substring(CSS[['css.labelcellborder']], 2)), CSS[['css.labelcellborder']]) if (!is.null(CSS[['css.depvarhead']])) css.depvarhead <- ifelse(substring(CSS[['css.depvarhead']], 1, 1) == '+', paste0(css.depvarhead, substring(CSS[['css.depvarhead']], 2)), CSS[['css.depvarhead']]) if (!is.null(CSS[['css.depvarheadnodv']])) css.depvarheadnodv <- ifelse(substring(CSS[['css.depvarheadnodv']], 1, 1) == '+', paste0(css.depvarheadnodv, substring(CSS[['css.depvarheadnodv']], 2)), CSS[['css.depvarheadnodv']]) if (!is.null(CSS[['css.modelcolumn1']])) css.modelcolumn1 <- ifelse(substring(CSS[['css.modelcolumn1']], 1, 1) == '+', paste0(css.modelcolumn1, substring(CSS[['css.modelcolumn1']], 2)), CSS[['css.modelcolumn1']]) if (!is.null(CSS[['css.modelcolumn2']])) css.modelcolumn2 <- ifelse(substring(CSS[['css.modelcolumn2']], 1, 1) == '+', paste0(css.modelcolumn2, substring(CSS[['css.modelcolumn2']], 2)), CSS[['css.modelcolumn2']]) if (!is.null(CSS[['css.modelcolumn3']])) css.modelcolumn3 <- ifelse(substring(CSS[['css.modelcolumn3']], 1, 1) == '+', paste0(css.modelcolumn3, substring(CSS[['css.modelcolumn3']], 2)), CSS[['css.modelcolumn3']]) if (!is.null(CSS[['css.modelcolumn4']])) css.modelcolumn4 <- ifelse(substring(CSS[['css.modelcolumn4']], 1, 1) == '+', paste0(css.modelcolumn4, substring(CSS[['css.modelcolumn4']], 2)), CSS[['css.modelcolumn4']]) if (!is.null(CSS[['css.modelcolumn5']])) css.modelcolumn5 <- ifelse(substring(CSS[['css.modelcolumn5']], 1, 1) == '+', paste0(css.modelcolumn5, substring(CSS[['css.modelcolumn5']], 2)), CSS[['css.modelcolumn5']]) if (!is.null(CSS[['css.modelcolumn6']])) css.modelcolumn6 <- ifelse(substring(CSS[['css.modelcolumn6']], 1, 1) == '+', paste0(css.modelcolumn6, substring(CSS[['css.modelcolumn6']], 2)), CSS[['css.modelcolumn6']]) if (!is.null(CSS[['css.modelcolumn7']])) css.modelcolumn7 <- ifelse(substring(CSS[['css.modelcolumn7']], 1, 1) == '+', paste0(css.modelcolumn7, substring(CSS[['css.modelcolumn7']], 2)), CSS[['css.modelcolumn7']]) } # set style sheet if (task == 1) { content <- sprintf( "", tag.table, css.table, tag.caption, css.caption, tag.td, css.td, tag.thead, css.thead, tag.tdata, css.tdata, tag.arc, css.arc, tag.summary, css.summary, tag.summarydata, css.summarydata, tag.fixedparts, css.fixedparts, tag.randomparts, css.randomparts, tag.zeroparts, css.zeroparts, tag.simplexparts, css.simplexparts, tag.lasttablerow, css.lasttablerow, tag.firsttablerow, css.firsttablerow, tag.firstsumrow, css.firstsumrow, tag.labelcellborder, css.labelcellborder, tag.depvarhead, css.depvarhead, tag.depvarheadnodv, css.depvarheadnodv, tag.leftalign, css.leftalign, tag.centeralign, css.centeralign, tag.firsttablecol, css.firsttablecol, tag.footnote, css.footnote, tag.subtitle, css.subtitle, tag.modelcolumn1, css.modelcolumn1, tag.modelcolumn2, css.modelcolumn2, tag.modelcolumn3, css.modelcolumn3, tag.modelcolumn4, css.modelcolumn4, tag.modelcolumn5, css.modelcolumn5, tag.modelcolumn6, css.modelcolumn6, tag.modelcolumn7, css.modelcolumn7, tag.col1, css.col1, tag.col2, css.col2, tag.col3, css.col3, tag.col4, css.col4, tag.col5, css.col5, tag.col6, css.col6 ) } else if (task == 2) { # set style attributes for main table tags content <- gsub("class=", "style=", content, fixed = TRUE, useBytes = TRUE) content <- gsub("% #' group_by(c161sex) %>% #' select(start:end) %>% #' plot_stackfrq() #' #' @import ggplot2 #' @importFrom sjmisc frq #' @importFrom scales percent #' @importFrom stats na.omit #' @importFrom rlang .data #' @export plot_stackfrq <- function(items, title = NULL, legend.title = NULL, legend.labels = NULL, axis.titles = NULL, axis.labels = NULL, weight.by = NULL, sort.frq = NULL, wrap.title = 50, wrap.labels = 30, wrap.legend.title = 30, wrap.legend.labels = 28, geom.size = 0.5, geom.colors = "Blues", show.prc = TRUE, show.n = FALSE, show.total = TRUE, show.axis.prc = TRUE, show.legend = TRUE, grid.breaks = 0.2, expand.grid = FALSE, digits = 1, vjust = "center", coord.flip = TRUE) { # check param. if we have a single vector instead of # a data frame with several items, convert vector to data frame if (!is.data.frame(items) && !is.matrix(items)) items <- as.data.frame(items) pl <- NULL if (inherits(items, "grouped_df")) { # get grouped data grps <- get_grouped_data(items) # now plot everything for (i in seq_len(nrow(grps))) { # copy back labels to grouped data frame tmp <- sjlabelled::copy_labels(grps$data[[i]], items) # prepare argument list, including title tmp.title <- get_grouped_plottitle(items, grps, i, sep = "\n") # plot plots <- .plot_stackfrq_helper( items = tmp, title = tmp.title, legend.title = legend.title, legend.labels = legend.labels, axis.titles = axis.titles, axis.labels = axis.labels, weight.by = weight.by, sort.frq = sort.frq, wrap.title = wrap.title, wrap.labels = wrap.labels, wrap.legend.title = wrap.legend.title, wrap.legend.labels = wrap.legend.labels, geom.size = geom.size, geom.colors = geom.colors, show.prc = show.prc, show.n = show.n, show.total = show.total, show.axis.prc = show.axis.prc, show.legend = show.legend, grid.breaks = grid.breaks, expand.grid = expand.grid, digits = digits, vjust = vjust, coord.flip = coord.flip ) # add plots, check for NULL results pl <- list(pl, plots) } } else { pl <- .plot_stackfrq_helper( items = items, title = title, legend.title = legend.title, legend.labels = legend.labels, axis.titles = axis.titles, axis.labels = axis.labels, weight.by = weight.by, sort.frq = sort.frq, wrap.title = wrap.title, wrap.labels = wrap.labels, wrap.legend.title = wrap.legend.title, wrap.legend.labels = wrap.legend.labels, geom.size = geom.size, geom.colors = geom.colors, show.prc = show.prc, show.n = show.n, show.total = show.total, show.axis.prc = show.axis.prc, show.legend = show.legend, grid.breaks = grid.breaks, expand.grid = expand.grid, digits = digits, vjust = vjust, coord.flip = coord.flip ) } pl } .plot_stackfrq_helper <- function( items, title, legend.title, legend.labels, axis.titles, axis.labels, weight.by, sort.frq, wrap.title, wrap.labels, wrap.legend.title, wrap.legend.labels, geom.size, geom.colors, show.prc, show.n, show.total, show.axis.prc, show.legend, grid.breaks, expand.grid, digits, vjust, coord.flip) { # copy titles if (is.null(axis.titles)) { axisTitle.x <- NULL axisTitle.y <- NULL } else { axisTitle.x <- axis.titles[1] if (length(axis.titles) > 1) axisTitle.y <- axis.titles[2] else axisTitle.y <- NULL } # check sorting if (!is.null(sort.frq)) { if (sort.frq == "first.asc") { sort.frq <- "first" reverseOrder <- FALSE } else if (sort.frq == "first.desc") { sort.frq <- "first" reverseOrder <- TRUE } else if (sort.frq == "last.asc") { sort.frq <- "last" reverseOrder <- TRUE } else if (sort.frq == "last.desc") { sort.frq <- "last" reverseOrder <- FALSE } else { sort.frq <- NULL reverseOrder <- FALSE } } else { reverseOrder <- FALSE } # try to automatically set labels if not passed as parameter if (is.null(legend.labels)) legend.labels <- sjlabelled::get_labels( items[[1]], attr.only = F, values = NULL, non.labelled = T ) if (is.null(axis.labels)) { axis.labels <- sjlabelled::get_label(items, def.value = colnames(items)) } # unname labels, if necessary, so we have a simple # character vector if (!is.null(names(axis.labels))) axis.labels <- as.vector(axis.labels) # unname labels, if necessary, so we have a simple # character vector if (!is.null(legend.labels) && !is.null(names(legend.labels))) legend.labels <- as.vector(legend.labels) # if we have no legend labels, we iterate all data frame's # columns to find all unique items of the data frame. # In case one item has missing categories, this may be # "compensated" by looking at all items, so we have the # actual values of all items. if (is.null(legend.labels)) { legend.labels <- as.character(sort(unique(unlist( apply(items, 2, function(x) unique(stats::na.omit(x))))))) } # if we have legend labels, we know the exact # amount of groups countlen <- length(legend.labels) # create cross table for stats, summary etc. # and weight variable. do this for each item that was # passed as parameter if (is.null(weight.by)) { dummy <- sjmisc::frq(items, show.na = TRUE) dummy <- lapply(dummy, function(.i) .i[-nrow(.i), ]) } else { items$weights <- weight.by dummy <- sjmisc::frq(items, weights = items$weights, show.na = TRUE) dummy <- lapply(dummy, function(.i) .i[-nrow(.i), ]) } dummy <- lapply(1:length(dummy), function(.i) { dummy[[.i]]$grp <- .i dummy[[.i]]$ypos <- (cumsum(dummy[[.i]]$valid.prc) - 0.5 * dummy[[.i]]$valid.prc) / 100 dummy[[.i]] }) mydat <- do.call(rbind, dummy) # remove NA row mydat <- mydat[!is.na(mydat$ypos), ] mydat$grp <- as.factor(mydat$grp) mydat$cat <- as.factor(mydat$val) mydat$prc <- mydat$valid.prc / 100 # Check whether N of each item should be included into # axis labels if (show.total) { for (i in seq_len(length(axis.labels))) { axis.labels[i] <- paste(axis.labels[i], sprintf(" (n=%i)", sum(dummy[[i]]$frq, na.rm = TRUE)), sep = "") } } # Prepare and trim legend labels to appropriate size # wrap legend text lines legend.labels <- sjmisc::word_wrap(legend.labels, wrap.legend.labels) # check whether we have a title for the legend # if yes, wrap legend title line if (!is.null(legend.title)) legend.title <- sjmisc::word_wrap(legend.title, wrap.legend.title) # check length of diagram title and split longer string at into new lines # every 50 chars if (!is.null(title)) title <- sjmisc::word_wrap(title, wrap.title) # check length of x-axis-labels and split longer strings at into new lines # every 10 chars, so labels don't overlap if (!is.null(axis.labels)) axis.labels <- sjmisc::word_wrap(axis.labels, wrap.labels) # Check if ordering was requested if (!is.null(sort.frq)) { # order by first cat if (sort.frq == "first") { facord <- order(mydat$prc[which(mydat$cat == 1)]) } else { # order by last cat facord <- order(mydat$prc[which(mydat$cat == countlen)]) } # create dummy vectors from 1 to itemlength dummy1 <- dummy2 <- seq_len(length(facord)) # facords holds the ordered item indices! we now need to # change the original item-index with its ordered position index. # example: # we have 4 items, and they may be ordered like this: # 1 3 4 2 # so the first item is the one with the lowest count , item 3 is on second postion, # item 4 is on third position and item 2 is the last item (with highest count) # we now need their order as subsequent vector: 1 4 2 3 # (i.e. item 1 is on first pos, item 2 is on fourth pos, item 3 is on # second pos and item 4 is on third pos in order) if (reverseOrder) { dummy2[rev(facord)] <- dummy1 } else { dummy2[facord] <- dummy1 } # now we have the order of either lowest to highest counts of first # or last category of "items". We now need to repeat these values as # often as we have answer categories orderedrow <- unlist(tapply(dummy2, seq_len(length(dummy2)), function(x) rep(x, countlen))) # replace old grp-order by new order mydat$grp <- as.factor(orderedrow) # reorder axis labels as well axis.labels <- axis.labels[order(dummy2)] } # check if category-oder on x-axis should be reversed # change category label order then if (reverseOrder && is.null(sort.frq)) axis.labels <- rev(axis.labels) # set diagram margins if (expand.grid) { expgrid <- waiver() } else { expgrid <- c(0, 0) } # Set value labels and label digits mydat$digits <- digits if (show.prc && !show.n) { ggvaluelabels <- geom_text( aes(y = .data$ypos, label = sprintf("%.*f%%", .data$digits, 100 * .data$prc)), vjust = vjust ) } else if (show.n && !show.prc) { ggvaluelabels <- geom_text( aes(y = .data$ypos, label = sprintf("%i", as.integer(.data$frq))), vjust = vjust ) } else if (show.n && show.prc) { ggvaluelabels <- geom_text( aes(y = .data$ypos, label = sprintf("%.*f%% (n=%i)", .data$digits, 100 * .data$prc, as.integer(.data$frq))), vjust = vjust ) } else { ggvaluelabels <- geom_text(aes(y = .data$ypos), label = "") } # Set up grid breaks if (is.null(grid.breaks)) { gridbreaks <- waiver() } else { gridbreaks <- c(seq(0, 1, by = grid.breaks)) } # check if category-oder on x-axis should be reversed # change x axis order then if (reverseOrder && is.null(sort.frq)) { baseplot <- ggplot(mydat, aes(x = rev(.data$grp), y = .data$prc, fill = .data$cat)) } else { baseplot <- ggplot(mydat, aes(x = .data$grp, y = .data$prc, fill = .data$cat)) } baseplot <- baseplot + # plot bar chart geom_bar(stat = "identity", position = position_stack(reverse = TRUE), width = geom.size) # show/hide percentage values on x axis if (show.axis.prc) perc.val <- scales::percent else perc.val <- NULL # start plot here baseplot <- baseplot + # show absolute and percentage value of each bar. ggvaluelabels + # no additional labels for the x- and y-axis, only diagram title labs(title = title, x = axisTitle.x, y = axisTitle.y, fill = legend.title) + # print value labels to the x-axis. # If parameter "axis.labels" is NULL, the category numbers (1 to ...) # appear on the x-axis scale_x_discrete(labels = axis.labels) + # set Y-axis, depending on the calculated upper y-range. # It either corresponds to the maximum amount of cases in the data set # (length of var) or to the highest count of var's categories. scale_y_continuous(breaks = gridbreaks, limits = c(-0.02, 1.02), expand = expgrid, labels = perc.val) # check whether coordinates should be flipped, i.e. # swap x and y axis if (coord.flip) baseplot <- baseplot + coord_flip() # set geom colors sj.setGeomColors( baseplot, geom.colors, length(legend.labels), show.legend, legend.labels ) } sjPlot/R/tab_corr.R0000644000176200001440000005456113662304072013662 0ustar liggesusers#' @title Summary of correlations as HTML table #' @name tab_corr #' #' @description Shows the results of a computed correlation as HTML table. Requires either #' a \code{\link{data.frame}} or a matrix with correlation coefficients #' as returned by the \code{\link{cor}}-function. #' #' @param data Matrix with correlation coefficients as returned by the #' \code{\link{cor}}-function, or a \code{data.frame} of variables where #' correlations between columns should be computed. #' @param na.deletion Indicates how missing values are treated. May be either #' \code{"listwise"} (default) or \code{"pairwise"}. May be #' abbreviated. #' @param corr.method Indicates the correlation computation method. May be one of #' \code{"pearson"} (default), \code{"spearman"} or \code{"kendall"}. #' May be abbreviated. #' @param p.numeric Logical, if \code{TRUE}, the p-values are printed #' as numbers. If \code{FALSE} (default), asterisks are used. #' @param fade.ns Logical, if \code{TRUE} (default), non-significant correlation-values #' appear faded (by using a lighter grey text color). See 'Note'. #' @param triangle Indicates whether only the upper right (use \code{"upper"}), lower left (use \code{"lower"}) #' or both (use \code{"both"}) triangles of the correlation table is filled with values. Default #' is \code{"both"}. You can specifiy the inital letter only. #' @param val.rm Specify a number between 0 and 1 to suppress the output of correlation values #' that are smaller than \code{val.rm}. The absolute correlation values are used, so #' a correlation value of \code{-.5} would be greater than \code{val.rm = .4} and thus not be #' omitted. By default, this argument is \code{NULL}, hence all values are shown in the table. #' If a correlation value is below the specified value of \code{val.rm}, it is still printed to #' the HTML table, but made "invisible" with white foreground color. You can use the \code{CSS} #' argument (\code{"css.valueremove"}) to change color and appearance of those correlation value that are smaller than #' the limit specified by \code{val.rm}. #' @param string.diag A vector with string values of the same length as \code{ncol(data)} (number of #' correlated items) that can be used to display content in the diagonal cells #' where row and column item are identical (i.e. the "self-correlation"). By defauilt, #' this argument is \code{NULL} and the diagnal cells are empty. #' #' @inheritParams tab_model #' @inheritParams tab_xtab #' @inheritParams plot_grpfrq #' @inheritParams plot_gpt #' #' @return Invisibly returns #' \itemize{ #' \item the web page style sheet (\code{page.style}), #' \item the web page content (\code{page.content}), #' \item the complete html-output (\code{page.complete}) and #' \item the html-table with inline-css for use with knitr (\code{knitr}) #' } #' for further use. #' #' @note If \code{data} is a matrix with correlation coefficients as returned by #' the \code{\link{cor}}-function, p-values can't be computed. #' Thus, \code{show.p}, \code{p.numeric} and \code{fade.ns} #' only have an effect if \code{data} is a \code{\link{data.frame}}. #' #' @examples #' \dontrun{ #' if (interactive()) { #' # Data from the EUROFAMCARE sample dataset #' library(sjmisc) #' data(efc) #' #' # retrieve variable and value labels #' varlabs <- get_label(efc) #' #' # recveive first item of COPE-index scale #' start <- which(colnames(efc) == "c83cop2") #' # recveive last item of COPE-index scale #' end <- which(colnames(efc) == "c88cop7") #' #' # create data frame with COPE-index scale #' mydf <- data.frame(efc[, c(start:end)]) #' colnames(mydf) <- varlabs[c(start:end)] #' #' # we have high correlations here, because all items #' # belong to one factor. #' tab_corr(mydf, p.numeric = TRUE) #' #' # auto-detection of labels, only lower triangle #' tab_corr(efc[, c(start:end)], triangle = "lower") #' #' # auto-detection of labels, only lower triangle, all correlation #' # values smaller than 0.3 are not shown in the table #' tab_corr(efc[, c(start:end)], triangle = "lower", val.rm = 0.3) #' #' # auto-detection of labels, only lower triangle, all correlation #' # values smaller than 0.3 are printed in blue #' tab_corr(efc[, c(start:end)], triangle = "lower",val.rm = 0.3, #' CSS = list(css.valueremove = 'color:blue;')) #' }} #' @importFrom stats na.omit cor cor.test #' @export tab_corr <- function(data, na.deletion = c("listwise", "pairwise"), corr.method = c("pearson", "spearman", "kendall"), title = NULL, var.labels = NULL, wrap.labels = 40, show.p = TRUE, p.numeric = FALSE, fade.ns = TRUE, val.rm = NULL, digits = 3, triangle = "both", string.diag = NULL, CSS = NULL, encoding = NULL, file = NULL, use.viewer = TRUE, remove.spaces = TRUE) { # -------------------------------------------------------- # check p-value-style option # -------------------------------------------------------- opt <- getOption("p_zero") if (is.null(opt) || opt == FALSE) { p_zero <- "" } else { p_zero <- "0" } # -------------------------------------------------------- # check args # -------------------------------------------------------- na.deletion <- match.arg(na.deletion) corr.method <- match.arg(corr.method) # -------------------------------------------------------- # check encoding # -------------------------------------------------------- encoding <- get.encoding(encoding, data) # -------------------------------------------------------- # argument check # -------------------------------------------------------- if (is.null(triangle)) { triangle <- "both" } else if (triangle == "u" || triangle == "upper") { triangle <- "upper" } else if (triangle == "l" || triangle == "lower") { triangle <- "lower" } else triangle <- "both" # -------------------------------------------------------- # try to automatically set labels is not passed as argument # -------------------------------------------------------- if (is.null(var.labels) && is.data.frame(data)) { var.labels <- sjlabelled::get_label(data, def.value = colnames(data)) } # ---------------------------- # check for valid argument # ---------------------------- if (corr.method != "pearson" && corr.method != "spearman" && corr.method != "kendall") { stop("argument 'corr.method' must be one of: pearson, spearman or kendall") } # ---------------------------- # check if user has passed a data frame # or a pca object # ---------------------------- if (is.matrix(data)) { corr <- data cpvalues <- NULL } else { # missing deletion corresponds to # SPSS listwise if (na.deletion == "listwise") { data <- stats::na.omit(data) corr <- stats::cor(data, method = corr.method) } else { # missing deletion corresponds to # SPSS pairwise corr <- stats::cor(data, method = corr.method, use = "pairwise.complete.obs") } #--------------------------------------- # if we have a data frame as argument, # compute p-values of significances #--------------------------------------- computePValues <- function(df) { cp <- c() for (i in 1:ncol(df)) { pv <- c() for (j in 1:ncol(df)) { test <- suppressWarnings( stats::cor.test( df[[i]], df[[j]], alternative = "two.sided", method = corr.method ) ) pv <- cbind(pv, round(test$p.value, 5)) } cp <- rbind(cp, pv) } return(cp) } cpvalues <- computePValues(data) } # -------------------------------------------------------- # save original p-values # -------------------------------------------------------- cpv <- cpvalues # -------------------------------------------------------- # add column with significance value # -------------------------------------------------------- if (!is.null(cpvalues)) { if (!p.numeric) { # -------------------------------------------------------- # prepare function for apply-function. replace sig. p # with asterisks # -------------------------------------------------------- fun.star <- function(x) { x <- get_p_stars(x) } } else { # -------------------------------------------------------- # prepare function for apply-function. # round p-values, keeping the numeric values # -------------------------------------------------------- fun.star <- function(x) { round(x, digits) } } cpvalues <- apply(cpvalues, c(1,2), fun.star) if (p.numeric) { cpvalues <- apply( cpvalues, c(1,2), function(x) { if (x < 0.001) x <- sprintf("<%s.001", p_zero) else x <- sub("0", p_zero, sprintf("%.*f", digits, x)) } ) } } else { show.p <- FALSE } # ---------------------------- # check if user defined labels have been supplied # if not, use variable names from data frame # ---------------------------- if (is.null(var.labels)) { var.labels <- row.names(corr) } # check length of x-axis-labels and split longer strings at into new lines var.labels <- sjmisc::word_wrap(var.labels, wrap.labels, "
") # ------------------------------------- # init header # ------------------------------------- toWrite <- table.header <- sprintf("\n\n\n", encoding) # ------------------------------------- # init style sheet and tags used for css-definitions # we can use these variables for string-replacement # later for return value # ------------------------------------- tag.table <- "table" tag.caption <- "caption" tag.thead <- "thead" tag.tdata <- "tdata" tag.notsig <- "notsig" tag.pval <- "pval" tag.valueremove <- "valueremove" tag.summary <- "summary" tag.centeralign <- "centeralign" tag.firsttablecol <- "firsttablecol" css.table <- "border-collapse:collapse; border:none;" css.thead <- "font-style:italic; font-weight:normal; border-top:double black; border-bottom:1px solid black; padding:0.2cm;" css.tdata <- "padding:0.2cm;" css.caption <- "font-weight: bold; text-align:left;" css.valueremove <- "color:white;" css.centeralign <- "text-align:center;" css.firsttablecol <- "font-style:italic;" css.notsig <- "color:#999999;" css.summary <- "border-bottom:double black; border-top:1px solid black; font-style:italic; font-size:0.9em; text-align:right;" css.pval <- "vertical-align:super;font-size:0.8em;" if (p.numeric) css.pval <- "font-style:italic;" # ------------------------ # check user defined style sheets # ------------------------ if (!is.null(CSS)) { if (!is.null(CSS[['css.table']])) css.table <- ifelse(substring(CSS[['css.table']], 1, 1) == '+', paste0(css.table, substring(CSS[['css.table']], 2)), CSS[['css.table']]) if (!is.null(CSS[['css.thead']])) css.thead <- ifelse(substring(CSS[['css.thead']], 1, 1) == '+', paste0(css.thead, substring(CSS[['css.thead']], 2)), CSS[['css.thead']]) if (!is.null(CSS[['css.tdata']])) css.tdata <- ifelse(substring(CSS[['css.tdata']], 1, 1) == '+', paste0(css.tdata, substring(CSS[['css.tdata']], 2)), CSS[['css.tdata']]) if (!is.null(CSS[['css.caption']])) css.caption <- ifelse(substring(CSS[['css.caption']], 1, 1) == '+', paste0(css.caption, substring(CSS[['css.caption']], 2)), CSS[['css.caption']]) if (!is.null(CSS[['css.summary']])) css.summary <- ifelse(substring(CSS[['css.summary']], 1, 1) == '+', paste0(css.summary, substring(CSS[['css.summary']], 2)), CSS[['css.summary']]) if (!is.null(CSS[['css.centeralign']])) css.centeralign <- ifelse(substring(CSS[['css.centeralign']], 1, 1) == '+', paste0(css.centeralign, substring(CSS[['css.centeralign']], 2)), CSS[['css.centeralign']]) if (!is.null(CSS[['css.firsttablecol']])) css.firsttablecol <- ifelse(substring(CSS[['css.firsttablecol']], 1, 1) == '+', paste0(css.firsttablecol, substring(CSS[['css.firsttablecol']], 2)), CSS[['css.firsttablecol']]) if (!is.null(CSS[['css.notsig']])) css.notsig <- ifelse(substring(CSS[['css.notsig']], 1, 1) == '+', paste0(css.notsig, substring(CSS[['css.notsig']], 2)), CSS[['css.notsig']]) if (!is.null(CSS[['css.pval']])) css.pval <- ifelse(substring(CSS[['css.pval']], 1, 1) == '+', paste0(css.pval, substring(CSS[['css.pval']], 2)), CSS[['css.pval']]) if (!is.null(CSS[['css.valueremove']])) css.valueremove <- ifelse(substring(CSS[['css.valueremove']], 1, 1) == '+', paste0(css.valueremove, substring(CSS[['css.valueremove']], 2)), CSS[['css.valueremove']]) } # ------------------------ # set page style # ------------------------ page.style <- sprintf("", tag.table, css.table, tag.caption, css.caption, tag.thead, css.thead, tag.tdata, css.tdata, tag.firsttablecol, css.firsttablecol, tag.centeralign, css.centeralign, tag.notsig, css.notsig, tag.pval, css.pval, tag.summary, css.summary, tag.valueremove, css.valueremove) # ------------------------ # start content # ------------------------ toWrite <- paste0(toWrite, page.style) toWrite = paste(toWrite, "\n\n", "\n") # ------------------------------------- # start table tag # ------------------------------------- page.content <- "\n" # ------------------------------------- # table caption, variable label # ------------------------------------- if (!is.null(title)) page.content <- paste0(page.content, sprintf(" \n", title)) # ------------------------------------- # header row # ------------------------------------- # write tr-tag page.content <- paste0(page.content, " \n") # first column page.content <- paste0(page.content, " \n") # iterate columns for (i in 1:ncol(corr)) { page.content <- paste0(page.content, sprintf(" \n", var.labels[i])) } # close table row page.content <- paste0(page.content, " \n") # ------------------------------------- # data rows # ------------------------------------- # iterate all rows of df for (i in 1:nrow(corr)) { # write tr-tag page.content <- paste0(page.content, " \n") # print first table cell page.content <- paste0(page.content, sprintf(" \n", var.labels[i])) # -------------------------------------------------------- # iterate all columns # -------------------------------------------------------- for (j in 1:ncol(corr)) { # -------------------------------------------------------- # leave out self-correlations # -------------------------------------------------------- if (j == i) { if (is.null(string.diag) || length(string.diag) > ncol(corr)) { page.content <- paste0(page.content, " \n") } else { page.content <- paste0(page.content, sprintf(" \n", string.diag[j])) } } else { # -------------------------------------------------------- # check whether only lower or upper triangle of correlation # table should be printed # -------------------------------------------------------- if ((triangle == "upper" && j > i) || (triangle == "lower" && i > j) || triangle == "both") { # -------------------------------------------------------- # print table-cell-data (cor-value) # -------------------------------------------------------- cellval <- sprintf("%.*f", digits, corr[i, j]) # -------------------------------------------------------- # check whether we want to show P-Values # -------------------------------------------------------- if (show.p) { if (p.numeric) { # -------------------------------------------------------- # if we have p-values as number, print them in new row # -------------------------------------------------------- cellval <- sprintf("%s
(%s)", cellval, cpvalues[i, j]) } else { # -------------------------------------------------------- # if we have p-values as "*", add them # -------------------------------------------------------- cellval <- sprintf("%s%s", cellval, cpvalues[i, j]) } } # -------------------------------------------------------- # prepare css for not significant values # -------------------------------------------------------- notsig <- "" # -------------------------------------------------------- # check whether non significant values should be blurred # -------------------------------------------------------- if (fade.ns && !is.null(cpv)) { # set css-class-attribute if (cpv[i, j] >= 0.05) notsig <- " notsig" } # -------------------------------------------------------- # prepare css for values that shoould be removed due to low # correlation value # -------------------------------------------------------- value.remove <- "" # -------------------------------------------------------- # check whether correlation value is too small and should # be omitted # -------------------------------------------------------- if (!is.null(val.rm) && abs(corr[i, j]) < abs(val.rm)) { value.remove <- " valueremove" } page.content <- paste0(page.content, sprintf(" \n", notsig, value.remove, cellval)) } else { page.content <- paste0(page.content, " \n") } } } # close row page.content <- paste0(page.content, " \n") } # ------------------------------------- # feedback... # ------------------------------------- page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, sprintf(" \n \n") # ------------------------------------- # finish table # ------------------------------------- page.content <- paste(page.content, "\n
%s
 %s
%s %s%s 
", ncol(corr) + 1)) page.content <- paste0(page.content, sprintf("Computed correlation used %s-method with %s-deletion.", corr.method, na.deletion)) page.content <- paste0(page.content, "
") # ------------------------------------- # finish html page # ------------------------------------- toWrite <- paste(toWrite, page.content, "\n") toWrite <- paste0(toWrite, "") # ------------------------------------- # replace class attributes with inline style, # useful for knitr # ------------------------------------- # copy page content # ------------------------------------- knitr <- page.content # ------------------------------------- # set style attributes for main table tags # ------------------------------------- knitr <- gsub("class=", "style=", knitr, fixed = TRUE, useBytes = TRUE) knitr <- gsub("") # ---------------------------- # if we have no variable labels, use row names # ---------------------------- if (is.null(var.labels)) var.labels <- colnames(items) # check length of x-axis-labels and split longer strings at into new lines var.labels <- sjmisc::word_wrap(var.labels, wrap.labels, "
") # ---------------------------- # additional statistics required from psych-package? # ---------------------------- if (show.skew) pstat_skewness <- datawizard::skewness(items) if (show.kurtosis) pstat_kurtosis <- datawizard::kurtosis(items) if (is.null(weight.by)) { dummy <- sjmisc::frq(items, show.strings = TRUE, show.na = show.na) } else { items$weights <- weight.by dummy <- sjmisc::frq(items, weights = items$weights, show.strings = TRUE, show.na = show.na) } mat.n <- .transform_data(dummy, col = "frq") mat <- .transform_data(dummy, col = ifelse(isTRUE(show.na), "raw.prc", "valid.prc")) # ---------------------------- # Check if ordering was requested # ---------------------------- # default order facord <- seq_len(nrow(mat)) if (!is.null(sort.frq)) { if (sort.frq == "first") facord <- order(mat.n$V1) else facord <- order(mat.n[, ncol(mat.n)]) } # ---------------------------- # reverse order # ---------------------------- if (reverseOrder) facord <- rev(facord) # ------------------------------------- # init header # ------------------------------------- toWrite <- table.header <- sprintf("\n\n\n", encoding) # ------------------------------------- # init style sheet and tags used for css-definitions # we can use these variables for string-replacement # later for return value # ------------------------------------- tag.table <- "table" tag.caption <- "caption" tag.thead <- "thead" tag.tdata <- "tdata" tag.arc <- "arc" tag.centeralign <- "centeralign" tag.firsttablecol <- "firsttablecol" tag.ncol <- "ncol" tag.skewcol <- "skewcol" tag.kurtcol <- "kurtcol" tag.summary <- "summary" css.table <- "border-collapse:collapse; border:none; border-bottom:double black;" css.caption <- "font-weight: bold; text-align:left;" css.thead <- "border-top:double black; border-bottom:1px solid black; padding:0.2cm;" css.tdata <- "padding:0.2cm;" css.arc <- "background-color:#eaeaea;" css.centeralign <- "text-align:center;" css.firsttablecol <- "font-style:italic;" css.ncol <- "" css.summary <- "" css.skewcol <- "" css.kurtcol <- "" # ------------------------ # check user defined style sheets # ------------------------ if (!is.null(CSS)) { if (!is.null(CSS[['css.table']])) css.table <- ifelse(substring(CSS[['css.table']],1,1) == '+', paste0(css.table, substring(CSS[['css.table']],2)), CSS[['css.table']]) if (!is.null(CSS[['css.thead']])) css.thead <- ifelse(substring(CSS[['css.thead']],1,1) == '+', paste0(css.thead, substring(CSS[['css.thead']],2)), CSS[['css.thead']]) if (!is.null(CSS[['css.caption']])) css.caption <- ifelse(substring(CSS[['css.caption']],1,1) == '+', paste0(css.caption, substring(CSS[['css.caption']],2)), CSS[['css.caption']]) if (!is.null(CSS[['css.summary']])) css.summary <- ifelse(substring(CSS[['css.summary']],1,1) == '+', paste0(css.summary, substring(CSS[['css.summary']],2)), CSS[['css.summary']]) if (!is.null(CSS[['css.arc']])) css.arc <- ifelse(substring(CSS[['css.arc']],1,1) == '+', paste0(css.arc, substring(CSS[['css.arc']],2)), CSS[['css.arc']]) if (!is.null(CSS[['css.tdata']])) css.tdata <- ifelse(substring(CSS[['css.tdata']],1,1) == '+', paste0(css.tdata, substring(CSS[['css.tdata']],2)), CSS[['css.tdata']]) if (!is.null(CSS[['css.centeralign']])) css.centeralign <- ifelse(substring(CSS[['css.centeralign']],1,1) == '+', paste0(css.centeralign, substring(CSS[['css.centeralign']],2)), CSS[['css.centeralign']]) if (!is.null(CSS[['css.firsttablecol']])) css.firsttablecol <- ifelse(substring(CSS[['css.firsttablecol']],1,1) == '+', paste0(css.firsttablecol, substring(CSS[['css.firsttablecol']],2)), CSS[['css.firsttablecol']]) if (!is.null(CSS[['css.ncol']])) css.ncol <- ifelse(substring(CSS[['css.ncol']],1,1) == '+', paste0(css.ncol, substring(CSS[['css.ncol']],2)), CSS[['css.ncol']]) if (!is.null(CSS[['css.skewcol']])) css.skewcol <- ifelse(substring(CSS[['css.skewcol']],1,1) == '+', paste0(css.skewcol, substring(CSS[['css.skewcol']],2)), CSS[['css.skewcol']]) if (!is.null(CSS[['css.kurtcol']])) css.kurtcol <- ifelse(substring(CSS[['css.kurtcol']],1,1) == '+', paste0(css.kurtcol, substring(CSS[['css.kurtcol']],2)), CSS[['css.kurtcol']]) } # ------------------------ # set page style # ------------------------ page.style <- sprintf("", tag.table, css.table, tag.caption, css.caption, tag.thead, css.thead, tag.tdata, css.tdata, tag.firsttablecol, css.firsttablecol, tag.arc, css.arc, tag.centeralign, css.centeralign, tag.ncol, css.ncol, tag.summary, css.summary, tag.kurtcol, css.kurtcol, tag.skewcol, css.skewcol) # ------------------------ # start content # ------------------------ toWrite <- paste0(toWrite, page.style) toWrite = paste(toWrite, "\n\n", "\n") # ------------------------------------- # start table tag # ------------------------------------- page.content <- "\n" # ------------------------------------- # table caption # ------------------------------------- if (!is.null(title)) page.content <- paste(page.content, sprintf(" \n", title)) # ------------------------------------- # header row # ------------------------------------- # write tr-tag page.content <- paste0(page.content, " \n") # first column page.content <- paste0(page.content, " \n") # iterate columns for (i in 1:catcount) { page.content <- paste0(page.content, sprintf(" \n", value.labels[i])) } # add N column if (show.total) page.content <- paste0(page.content, sprintf(" \n", string.total)) # add skew column if (show.skew) page.content <- paste0(page.content, " \n") # add kurtosis column if (show.kurtosis) page.content <- paste0(page.content, " \n") # close table row page.content <- paste0(page.content, " \n") # ------------------------------------- # data rows # ------------------------------------- # iterate all rows of df for (i in seq_len(nrow(mat))) { # default row string for alternative row colors arcstring <- "" # if we have alternating row colors, set css if (alternate.rows) arcstring <- ifelse(sjmisc::is_even(i), " arc", "") # write tr-tag page.content <- paste0(page.content, " \n") # print first table cell page.content <- paste0(page.content, sprintf(" \n", arcstring, var.labels[facord[i]])) # -------------------------------------------------------- # iterate all columns # -------------------------------------------------------- for (j in seq_len(ncol(mat))) { if (show.n) { page.content <- paste0(page.content, sprintf(" \n", arcstring, as.integer(mat.n[facord[i], j]), digits, mat[facord[i], j])) } else { page.content <- paste0(page.content, sprintf(" \n", arcstring, digits, mat[facord[i], j])) } } # add column with N's if (show.total) page.content <- paste0(page.content, sprintf(" \n", arcstring, as.integer(sum(mat.n[facord[i], ])))) # add column with Skew's if (show.skew) page.content <- paste0(page.content, sprintf(" \n", arcstring, digits.stats, pstat_skewness[facord[i]])) # add column with Kurtosis's if (show.kurtosis) page.content <- paste0(page.content, sprintf(" \n", arcstring, digits.stats, pstat_kurtosis[facord[i]])) # close row page.content <- paste0(page.content, " \n") } # ------------------------------------- # finish table # ------------------------------------- page.content <- paste(page.content, "\n
%s
 %s%sSkewKurtosis
%s%i
(%.*f %%)
%.*f %%%i%.*f%.*f
") # ------------------------------------- # finish html page # ------------------------------------- toWrite <- paste(toWrite, page.content, "\n") toWrite <- paste0(toWrite, "") # ------------------------------------- # replace class attributes with inline style, # useful for knitr # ------------------------------------- # copy page content # ------------------------------------- knitr <- page.content # ------------------------------------- # set style attributes for main table tags # ------------------------------------- knitr <- gsub("class=", "style=", knitr, fixed = TRUE, useBytes = TRUE) knitr <- gsub("% tidyr::gather(key = "Term", value = "Estimate", !! gather.cols) # additional arguments? add.args <- lapply(match.call(expand.dots = F)$`...`, function(x) x) if ("alpha" %in% names(add.args)) alpha <- eval(add.args[["alpha"]]) if ("scale" %in% names(add.args)) scale <- eval(add.args[["scale"]]) if (!facets && requireNamespace("ggridges", quietly = TRUE)) { p <- ggplot(pp, aes_string(y = "Term", x = "Estimate", fill = "Sample")) + ggridges::geom_density_ridges2(alpha = alpha, rel_min_height = .005, scale = scale) + scale_fill_manual(values = col_check2(geom.colors, 2)) } else { p <- ggplot(pp, aes_string(x = "Estimate", fill = "Sample")) + geom_density(alpha = alpha) + scale_fill_manual(values = col_check2(geom.colors, 2)) if (!is.null(axis.labels) && !is.null(names(axis.labels))) { p <- p + facet_wrap(~Term, scales = "free", labeller = labeller(.default = label_value, Term = axis.labels)) } else { p <- p + facet_wrap(~Term, scales = "free") } } if (!is.null(axis.lim)) p <- p + scale_x_continuous(limits = axis.lim) p + xlab("Distribution") } sjPlot/R/sjPlotSetTheme.R0000644000176200001440000007137514104233216014774 0ustar liggesusers#' @title Set global theme options for sjp-functions #' @name set_theme #' #' @description Set global theme options for sjp-functions. #' #' @param base base theme where theme is built on. By default, all #' metrics from \code{theme_gray()} are used. See 'Details'. #' @param theme.font base font family for the plot. #' @param title.size size of plot title. Default is 1.3. #' @param title.color Color of plot title. Default is \code{"black"}. #' @param title.align alignment of plot title. Must be one of \code{"left"} (default), #' \code{"center"} or \code{"right"}. You may use initial letter only. #' @param title.vjust numeric, vertical adjustment for plot title. #' @param geom.outline.size size of bar outlines. Default is 0.1. Use #' size of \code{0} to remove geom outline. #' @param geom.outline.color Color of geom outline. Only applies, if \code{geom.outline.size} #' is larger than 0. #' @param geom.boxoutline.size size of outlines and median bar especially for boxplots. #' Default is 0.5. Use size of \code{0} to remove boxplot outline. #' @param geom.boxoutline.color Color of outlines and median bar especially for boxplots. #' Only applies, if \code{geom.boxoutline.size} is larger than 0. #' @param geom.alpha specifies the transparancy (alpha value) of geoms #' @param geom.linetype linetype of line geoms. Default is \code{1} (solid line). #' @param geom.errorbar.size size (thickness) of error bars. Default is \code{0.8} #' @param geom.errorbar.linetype linetype of error bars. Default is \code{1} (solid line). #' @param geom.label.color Color of geom's value and annotation labels #' @param geom.label.size size of geom's value and annotation labels #' @param geom.label.alpha alpha level of geom's value and annotation labels #' @param geom.label.angle angle of geom's value and annotation labels #' @param axis.title.color Color of x- and y-axis title labels #' @param axis.title.size size of x- and y-axis title labels #' @param axis.title.x.vjust numeric, vertical adjustment of x-axis-title. #' @param axis.title.y.vjust numeric, vertical adjustment of y-axis-title. #' @param axis.angle.x angle for x-axis labels #' @param axis.angle.y angle for y-axis labels #' @param axis.angle angle for x- and y-axis labels. If set, overrides both \code{axis.angle.x} and \code{axis.angle.y} #' @param axis.textcolor.x Color for x-axis labels. If not specified, a default dark gray #' color palette will be used for the labels. #' @param axis.textcolor.y Color for y-axis labels. If not specified, a default dark gray #' color palette will be used for the labels. #' @param axis.textcolor Color for both x- and y-axis labels. #' If set, overrides both \code{axis.textcolor.x} and \code{axis.textcolor.y} #' @param axis.linecolor.x Color of x-axis border #' @param axis.linecolor.y Color of y-axis border #' @param axis.linecolor Color for both x- and y-axis borders. #' If set, overrides both \code{axis.linecolor.x} and \code{axis.linecolor.y}. #' @param axis.line.size size (thickness) of axis lines. Only affected, if \code{axis.linecolor} #' is set. #' @param axis.textsize.x size of x-axis labels #' @param axis.textsize.y size of y-axis labels #' @param axis.textsize size for both x- and y-axis labels. #' If set, overrides both \code{axis.textsize.x} and \code{axis.textsize.y}. #' @param axis.ticksize.x size of tick marks at x-axis. #' @param axis.ticksize.y size of tick marks at y-axis. #' @param axis.tickslen length of axis tick marks #' @param axis.tickscol Color of axis tick marks #' @param axis.ticksmar margin between axis labels and tick marks #' @param panel.bordercol Color of whole diagram border (panel border) #' @param panel.backcol Color of the diagram's background #' @param panel.col Color of both diagram's border and background. #' If set, overrides both \code{panel.bordercol} and \code{panel.backcol}. #' @param panel.major.gridcol Color of the major grid lines of the diagram background #' @param panel.minor.gridcol Color of the minor grid lines of the diagram background #' @param panel.gridcol Color for both minor and major grid lines of the diagram background. #' If set, overrides both \code{panel.major.gridcol} and \code{panel.minor.gridcol}. #' @param panel.gridcol.x See \code{panel.gridcol}. #' @param panel.gridcol.y See \code{panel.gridcol}. #' @param panel.major.linetype line type for major grid lines #' @param panel.minor.linetype line type for minor grid lines #' @param plot.margins numeric vector of length 4, indicating the top, right, #' bottom and left margin of the plot region. #' @param plot.backcol Color of the plot's background #' @param plot.bordercol Color of whole plot's border (panel border) #' @param plot.col Color of both plot's region border and background. #' If set, overrides both \code{plot.backcol} and \code{plot.bordercol}. #' @param legend.pos position of the legend, if a legend is drawn. #' \describe{ #' \item{\emph{legend outside plot}}{ #' Use \code{"bottom"}, \code{"top"}, \code{"left"} or \code{"right"} #' to position the legend above, below, on the left or right side #' of the diagram. Right positioning is default. #' } #' \item{\emph{legend inside plot}}{ #' If \code{legend.inside = TRUE}, legend can be placed inside #' plot. Use \code{"top left"}, \code{"top right"}, \code{"bottom left"} #' and \code{"bottom right"} to position legend in any of these corners, #' or a two-element numeric vector with values from 0-1. See also #' \code{legend.inside}. #' } #' } #' @param legend.just justification of legend, relative to its position (\code{"center"} or #' two-element numeric vector with values from 0-1. By default (outside legend), #' justification is centered. If legend is inside and justification not specified, #' legend justification is set according to legend position. #' @param legend.inside logical, use \code{TRUE} to put legend inside the plotting area. See \code{legend.pos}. #' @param legend.size text size of the legend. Default is 1. Relative size, so #' recommended values are from 0.3 to 2.5 #' @param legend.color Color of the legend labels #' @param legend.title.size text size of the legend title #' @param legend.title.color Color of the legend title #' @param legend.title.face font face of the legend title. By default, \code{"bold"} face is used. #' @param legend.bordercol Color of the legend's border. Default is \code{"white"}, so no visible border is drawn. #' @param legend.backgroundcol fill color of the legend's background. Default is \code{"white"}, so no visible background is drawn. #' @param legend.item.size size of legend's item (legend key), in centimetres. #' @param legend.item.bordercol Color of the legend's item-border. Default is \code{"white"}. #' @param legend.item.backcol fill color of the legend's item-background. Default is \code{"grey90"}. #' #' @return The customized theme object, or \code{NULL}, if a ggplot-theme was used. #' #' @seealso \code{\link{sjPlot-themes}} #' #' @examples #' \dontrun{ #' library(sjmisc) #' data(efc) #' # set sjPlot-defaults, a slightly modification #' # of the ggplot base theme #' set_theme() #' #' # legends of all plots inside #' set_theme(legend.pos = "top left", legend.inside = TRUE) #' plot_xtab(efc$e42dep, efc$e16sex) #' #' # Use classic-theme. you may need to #' # load the ggplot2-library. #' library(ggplot2) #' set_theme(base = theme_classic()) #' plot_frq(efc$e42dep) #' #' # adjust value labels #' set_theme( #' geom.label.size = 3.5, #' geom.label.color = "#3366cc", #' geom.label.angle = 90 #' ) #' #' # hjust-aes needs adjustment for this #' update_geom_defaults('text', list(hjust = -0.1)) #' plot_xtab(efc$e42dep, efc$e16sex, vjust = "center", hjust = "center") #' #' # Create own theme based on classic-theme #' set_theme( #' base = theme_classic(), axis.linecolor = "grey50", #' axis.textcolor = "#6699cc" #' ) #' plot_frq(efc$e42dep)} #' #' @import ggplot2 #' @importFrom scales brewer_pal grey_pal #' @importFrom dplyr case_when #' @export set_theme <- function(base = theme_grey(), theme.font = NULL, # title defaults title.color = "black", title.size = 1.2, title.align = "left", title.vjust = NULL, # geom defaults # geom.colors=NULL, geom.outline.color = NULL, geom.outline.size = 0, geom.boxoutline.size = 0.5, geom.boxoutline.color = "black", geom.alpha = 1, geom.linetype = 1, geom.errorbar.size = 0.7, geom.errorbar.linetype = 1, # value labels geom.label.color = NULL, geom.label.size = 4, geom.label.alpha = 1, geom.label.angle = 0, # axis titles axis.title.color = "grey30", axis.title.size = 1.1, axis.title.x.vjust = NULL, axis.title.y.vjust = NULL, # axis text angle axis.angle.x = 0, axis.angle.y = 0, axis.angle = NULL, # axis text colors axis.textcolor.x = "grey30", axis.textcolor.y = "grey30", axis.textcolor = NULL, # axis line colors axis.linecolor.x = NULL, axis.linecolor.y = NULL, axis.linecolor = NULL, axis.line.size = 0.5, # axis text size axis.textsize.x = 1, axis.textsize.y = 1, axis.textsize = NULL, # axis ticks axis.tickslen = NULL, axis.tickscol = NULL, axis.ticksmar = NULL, axis.ticksize.x = NULL, axis.ticksize.y = NULL, # panel defaults panel.backcol = NULL, panel.bordercol = NULL, panel.col = NULL, panel.major.gridcol = NULL, panel.minor.gridcol = NULL, panel.gridcol = NULL, panel.gridcol.x = NULL, panel.gridcol.y = NULL, panel.major.linetype = 1, panel.minor.linetype = 1, # plot background color plot.backcol = NULL, plot.bordercol = NULL, plot.col = NULL, plot.margins = NULL, # legend legend.pos = "right", legend.just = NULL, legend.inside = FALSE, legend.size = 1, legend.color = "black", legend.title.size = 1, legend.title.color = "black", legend.title.face = "bold", legend.backgroundcol = "white", legend.bordercol = "white", legend.item.size = NULL, legend.item.backcol = "grey90", legend.item.bordercol = "white") { sjtheme <- NULL # ---------------------------------------- # set defaults for geom label colors # ---------------------------------------- if (is.null(geom.label.color)) { geom.label.color <- "black" } # ---------------------------------------- # set defaults for axis text angle # ---------------------------------------- if (!is.null(axis.angle)) { axis.angle.x <- axis.angle.y <- axis.angle } else { axis.angle <- axis.angle.x } # ---------------------------------------- # set defaults for axis text color # ---------------------------------------- if (!is.null(axis.textcolor)) { axis.textcolor.x <- axis.textcolor.y <- axis.textcolor } else { if (is.null(axis.textcolor.x)) axis.textcolor <- axis.textcolor.y else axis.textcolor <- axis.textcolor.x } # ---------------------------------------- # set defaults for axis line color # ---------------------------------------- if (!is.null(axis.linecolor)) { axis.linecolor.x <- axis.linecolor.y <- axis.linecolor } else { if (is.null(axis.linecolor.x)) axis.linecolor <- axis.linecolor.y else axis.linecolor <- axis.linecolor.x } # ---------------------------------------- # set defaults for axis text size # ---------------------------------------- if (!is.null(axis.textsize)) { axis.textsize.x <- axis.textsize.y <- axis.textsize } else { if (is.null(axis.textsize.x)) axis.textsize <- axis.textsize.y else axis.textsize <- axis.textsize.x } # ---------------------------------------- # set defaults for grid colors # ---------------------------------------- if (!is.null(panel.gridcol)) { panel.major.gridcol <- panel.minor.gridcol <- panel.gridcol } else { if (is.null(panel.major.gridcol)) panel.gridcol <- panel.minor.gridcol else panel.gridcol <- panel.major.gridcol } # ---------------------------------------- # set defaults for panel colors # ---------------------------------------- if (!is.null(panel.col)) { panel.backcol <- panel.bordercol <- panel.col } else { if (is.null(panel.backcol)) panel.col <- panel.bordercol else panel.col <- panel.backcol } # ---------------------------------------- # set title alignment # ---------------------------------------- if (!is.null(title.align)) { if (title.align == "left" || title.align == "l") title.align <- 0 if (title.align == "right" || title.align == "r") title.align <- 1 if (title.align == "center" || title.align == "c") title.align <- 0.5 } else { title.align <- 0 } # ---------------------------------------- # set defaults for plot colors # ---------------------------------------- if (!is.null(plot.col)) { plot.backcol <- plot.bordercol <- plot.col } else { if (is.null(plot.backcol)) plot.col <- plot.bordercol else plot.col <- plot.backcol } # ---------------------------------------- # set defaults for legend pos # ---------------------------------------- if (legend.inside) { # check if character constants have been used and if so, # convert to numeric vector if (is.character(legend.pos)) { if (legend.pos == "top right") legend.pos <- c(1,1) else if (legend.pos == "bottom right") legend.pos <- c(1,0) else if (legend.pos == "bottom left") legend.pos <- c(0,0) else if (legend.pos == "top left") legend.pos <- c(0,1) if (is.null(legend.just)) legend.just <- legend.pos } } # set justification default if (is.null(legend.just)) legend.just <- "center" # ---------------------------------------- # check if theme-preset is requested # ---------------------------------------- if (!is.null(theme) && any(class(theme) == "theme") && any(class(theme) == "gg")) { theme_set(theme) } # ---------------------------------------- # else, customize theme # ---------------------------------------- else if (!is.null(base) && any(class(base) == "theme") && any(class(base) == "gg")) { sjtheme <- base + # ---------------------------------------- # set base elements that are always set # ---------------------------------------- theme(plot.title = element_text(size = rel(title.size), colour = title.color, hjust = title.align), axis.text = element_text(angle = axis.angle, size = rel(axis.textsize), colour = axis.textcolor), axis.text.x = element_text(angle = axis.angle.x, size = rel(axis.textsize.x), colour = axis.textcolor.x), axis.text.y = element_text(angle = axis.angle.y, size = rel(axis.textsize.y), colour = axis.textcolor.y), axis.title = element_text(size = rel(axis.title.size), colour = axis.title.color), legend.position = legend.pos, legend.justification = legend.just, legend.text = element_text(size = rel(legend.size), colour = legend.color), legend.title = element_text(size = rel(legend.title.size), colour = legend.title.color, face = legend.title.face), legend.background = element_rect(colour = legend.bordercol, fill = legend.backgroundcol)) # ---------------------------------------- # set base font for theme # ---------------------------------------- if (!is.null(theme.font)) { sjtheme <- sjtheme + theme(text = element_text(family = theme.font)) } # ---------------------------------------- # set legend items background-color # ---------------------------------------- if (!is.null(legend.item.backcol)) { sjtheme <- sjtheme + theme(legend.key = element_rect(colour = legend.item.bordercol, fill = legend.item.backcol)) } # ---------------------------------------- # set legend item size # ---------------------------------------- if (!is.null(legend.item.size)) { sjtheme <- sjtheme + theme(legend.key.size = unit(legend.item.size, "cm")) } # ---------------------------------------- # set axis line colors, if defined # ---------------------------------------- if (!is.null(axis.linecolor)) { sjtheme <- sjtheme + theme(axis.line = element_line(colour = axis.linecolor, size = axis.line.size), axis.line.x = element_line(colour = axis.linecolor.x), axis.line.y = element_line(colour = axis.linecolor.y)) } # ---------------------------------------- # set axis ticks, if defined # ---------------------------------------- if (!is.null(axis.tickscol)) { sjtheme <- sjtheme + theme(axis.ticks = element_line(colour = axis.tickscol)) } if (!is.null(axis.tickslen)) { sjtheme <- sjtheme + theme(axis.ticks.length = unit(axis.tickslen, "cm")) } if (!is.null(axis.ticksmar)) { sjtheme <- sjtheme + theme(axis.text = element_text(margin = margin(t = axis.ticksmar, unit = "cm"))) } if (!is.null(axis.ticksize.x)) { sjtheme <- sjtheme + theme(axis.ticks.x = element_line(size = axis.ticksize.x)) } if (!is.null(axis.ticksize.y)) { sjtheme <- sjtheme + theme(axis.ticks.y = element_line(size = axis.ticksize.y)) } # ---------------------------------------- # set plot colors, if defined # ---------------------------------------- if (!is.null(plot.col)) { sjtheme <- sjtheme + theme(plot.background = element_rect(colour = plot.bordercol, fill = plot.backcol)) } # ---------------------------------------- # set panel colors, if defined # ---------------------------------------- if (!is.null(panel.col)) { sjtheme <- sjtheme + theme(panel.background = element_rect(colour = panel.bordercol, fill = panel.backcol), panel.border = element_rect(colour = panel.bordercol)) } # ---------------------------------------- # set panel grids, if defined # ---------------------------------------- if (!is.null(panel.gridcol)) { sjtheme <- sjtheme + theme(panel.grid.minor = element_line(colour = panel.minor.gridcol, linetype = panel.minor.linetype), panel.grid.major = element_line(colour = panel.major.gridcol, linetype = panel.major.linetype)) } # ---------------------------------------- # set plot margins. onyl applies to pre-set themes # ---------------------------------------- if (!is.null(plot.margins)) { sjtheme <- sjtheme + theme(plot.margin = plot.margins) } # ---------------------------------------- # set title adjustments. only applies to # pre-set themes # ---------------------------------------- if (!is.null(plot.margins)) { sjtheme <- sjtheme + theme(plot.margin = plot.margins) } if (!is.null(title.vjust)) { sjtheme <- sjtheme + theme(plot.title = element_text(vjust = title.vjust)) } if (!is.null(axis.title.x.vjust)) { sjtheme <- sjtheme + theme(axis.title.x = element_text(vjust = axis.title.x.vjust)) } if (!is.null(axis.title.y.vjust)) { sjtheme <- sjtheme + theme(axis.title.y = element_text(vjust = axis.title.y.vjust)) } # ---------------------------------------- # panel grid colors # ---------------------------------------- if (!is.null(panel.gridcol.x)) { sjtheme <- sjtheme + theme(panel.grid.minor.x = element_line(colour = panel.gridcol.x, linetype = panel.minor.linetype), panel.grid.major.x = element_line(colour = panel.gridcol.x, linetype = panel.major.linetype)) } if (!is.null(panel.gridcol.y)) { sjtheme <- sjtheme + theme(panel.grid.minor.y = element_line(colour = panel.gridcol.y, linetype = panel.minor.linetype), panel.grid.major.y = element_line(colour = panel.gridcol.y, linetype = panel.major.linetype)) } # ---------------------------------------- # finally, set theme # ---------------------------------------- theme_set(sjtheme) } else { warning("Either `theme` or `base` must be supplied as ggplot-theme-object to set global theme options for sjPlot.", call. = F) } # ---------------------------------------- # set defaults for geoms # ---------------------------------------- # if (is.null(geom.colors)) geom.colors <- diverge_hcl(9) # sj.theme_scales(geom.colors) sj.theme_geoms(geom.alpha, geom.linetype, geom.outline.size, geom.outline.color, geom.boxoutline.size, geom.boxoutline.color, geom.errorbar.size, geom.errorbar.linetype, geom.label.size, geom.label.color, geom.label.alpha, geom.label.angle) # return custom theme object invisible(sjtheme) } sj.theme_geoms <- function(geom.alpha, geom.linetype, geom.outline.size, geom.outline.color, geom.boxoutline.size, geom.boxoutline.color, geom.errorbar.size, geom.errorbar.linetype, geom.label.size, geom.label.color, geom.label.alpha, geom.label.angle) { # ---------------------------------------- # helper function to customize geoms # ---------------------------------------- updateGeoms <- function(geoms, parameters) { for (geom in geoms) update_geom_defaults(geom, parameters) } # Geoms that only require a default colour. updateGeoms(c('abline', 'point', 'density', 'errorbar', 'errorbarh', 'hline', 'line', 'area', 'tile', 'dotplot', 'bar'), list(alpha = geom.alpha)) update_geom_defaults('text', list(size = geom.label.size, colour = geom.label.color, alpha = geom.label.alpha, angle = geom.label.angle)) # Special geoms. update_geom_defaults('boxplot', list(size = geom.boxoutline.size, colour = geom.boxoutline.color, alpha = geom.alpha)) update_geom_defaults('bar', list(colour = geom.outline.color, size = geom.outline.size)) update_geom_defaults('line', list(linetype = geom.linetype)) updateGeoms(c('errorbar', 'errorbarh'), list(size = geom.errorbar.size, linetype = geom.errorbar.linetype)) } sj.setGeomColors <- function(plot, geom.colors, pal.len, show.legend = TRUE, labels = NULL, reverse.colors = FALSE) { # --------------------------------------------------------- # check for themr options # --------------------------------------------------------- if (!is.null(geom.colors) && geom.colors[1] == "themr") { return(plot) } labels <- factor(unname(labels), levels = labels) # --------------------------------------------------------- # dummy function for setting legend labels and geom-colors # --------------------------------------------------------- usenormalscale <- function(plot, geom.colors, labels, bw.figure, ltypes) { if (!show.legend) { plot <- plot + scale_fill_manual(values = geom.colors, guide = FALSE) + scale_colour_manual(values = geom.colors, guide = FALSE) + scale_linetype_manual(values = ltypes, guide = FALSE) + guides(fill = "none", colour = "none", text = "none", linetype = "none") } else { plot <- plot + scale_fill_manual(values = geom.colors, labels = labels) + scale_colour_manual(values = geom.colors, labels = labels) + scale_linetype_manual(values = ltypes) # for b/w figures, add linetype scale if (bw.figure) { plot <- plot + guides(text = "none", colour = "none") } else { plot <- plot + guides(text = "none", linetype = "none") } } plot } # --------------------------------------------------------- # dummy function for only setting legend labels, but no # geom-colors # --------------------------------------------------------- uselegendscale <- function(plot, labels, bw.figure, ltypes) { if (!show.legend) { plot <- plot + scale_fill_discrete(guide = FALSE) + scale_colour_discrete(guide = FALSE) + scale_linetype_manual(values = ltypes, guide = FALSE) + guides(fill = "none", colour = "none", text = "none", linetype = "none") } else { plot <- plot + scale_fill_discrete(labels = labels) + scale_colour_discrete(labels = labels) + scale_linetype_manual(values = ltypes) # for b/w figures, add linetype scale if (bw.figure) { plot <- plot + guides(text = "none", colour = "none") } else { plot <- plot + guides(text = "none", linetype = "none") } } plot } # check if we have coloured plot or b/w figure with different linetypes bw.figure <- !is.null(geom.colors) && geom.colors[1] == "bw" if (bw.figure) ltypes <- seq_len(pal.len) else ltypes <- rep(1, times = pal.len) # --------------------------------------------------------- # set geom colors # --------------------------------------------------------- if (!is.null(geom.colors)) { # brewer palette? if (is.brewer.pal(geom.colors[1])) { if (length(geom.colors) > 1) { neutral.color <- geom.colors[2] pal.len <- pal.len - 1 } else { neutral.color <- NULL } geom.colors <- scales::brewer_pal(palette = geom.colors[1])(pal.len) if (reverse.colors) geom.colors <- rev(geom.colors) if (!is.null(neutral.color)) geom.colors <- c(geom.colors, neutral.color) } else if (geom.colors[1] == "gs") { geom.colors <- scales::grey_pal()(pal.len) if (reverse.colors) geom.colors <- rev(geom.colors) } else if (geom.colors[1] == "bw") { geom.colors <- rep("black", times = pal.len) } else if (length(geom.colors) > pal.len) { warning("More colors provided than needed. Shortening color palette.") geom.colors <- geom.colors[1:pal.len] if (reverse.colors) geom.colors <- rev(geom.colors) } if (length(geom.colors) < pal.len) { warning("Too less colors provided for plot. Using default color palette.") plot <- uselegendscale(plot, labels, bw.figure, ltypes) } else { plot <- usenormalscale(plot, geom.colors, labels, bw.figure, ltypes) } } else { plot <- uselegendscale(plot, labels, bw.figure, ltypes) } plot } sjPlot/R/tab_xtab.R0000644000176200001440000007035614150126464013653 0ustar liggesusers#' @title Summary of contingency tables as HTML table #' @name tab_xtab #' #' @description Shows contingency tables as HTML file in browser or viewer pane, or saves them as file. #' #' @param var.row Variable that should be displayed in the table rows. #' @param var.col Cariable that should be displayed in the table columns. #' @param var.labels Character vector with variable names, which will be used #' to label variables in the output. #' @param string.total Character label for the total column / row header #' @param show.cell.prc Logical, if \code{TRUE}, cell percentage values are shown #' @param show.row.prc Logical, if \code{TRUE}, row percentage values are shown #' @param show.col.prc Logical, if \code{TRUE}, column percentage values are shown #' @param show.obs Logical, if \code{TRUE}, observed values are shown #' @param show.exp Logical, if \code{TRUE}, expected values are also shown #' @param show.summary Logical, if \code{TRUE}, a summary row with #' chi-squared statistics, degrees of freedom and Cramer's V or Phi #' coefficient and p-value for the chi-squared statistics. #' @param tdcol.n Color for highlighting count (observed) values in table cells. Default is black. #' @param tdcol.expected Color for highlighting expected values in table cells. Default is cyan. #' @param tdcol.cell Color for highlighting cell percentage values in table cells. Default is red. #' @param tdcol.row Color for highlighting row percentage values in table cells. Default is blue. #' @param tdcol.col Color for highlighting column percentage values in table cells. Default is green. #' @param emph.total Logical, if \code{TRUE}, the total column and row will be emphasized with a #' different background color. See \code{emph.color}. #' @param emph.color Logical, if \code{emph.total = TRUE}, this color value will be used #' for painting the background of the total column and row. Default is a light grey. #' @param prc.sign The percentage sign that is printed in the table cells, in HTML-format. #' Default is \code{" \%"}, hence the percentage sign has a non-breaking-space after #' the percentage value. #' @param hundret Default value that indicates the 100-percent column-sums (since rounding values #' may lead to non-exact results). Default is \code{"100.0"}. #' @param statistics Name of measure of association that should be computed. May #' be one of \code{"auto"}, \code{"cramer"}, \code{"phi"}, \code{"spearman"}, #' \code{"kendall"}, \code{"pearson"} or \code{"fisher"}. See #' \code{\link[sjstats]{xtab_statistics}}. #' @param ... Other arguments, currently passed down to the test statistics functions #' \code{chisq.test()} or \code{fisher.test()}. #' @param encoding String, indicating the charset encoding used for variable and #' value labels. Default is \code{NULL}, so encoding will be auto-detected #' depending on your platform (e.g., \code{"UTF-8"} for Unix and \code{"Windows-1252"} for #' Windows OS). Change encoding if specific chars are not properly displayed (e.g. German umlauts). #' @param remove.spaces Logical, if \code{TRUE}, leading spaces are removed from all lines in the final string #' that contains the html-data. Use this, if you want to remove parantheses for html-tags. The html-source #' may look less pretty, but it may help when exporting html-tables to office tools. #' @param value.labels Character vector (or \code{list} of character vectors) #' with value labels of the supplied variables, which will be used #' to label variable values in the output. #' #' @inheritParams tab_model #' @inheritParams plot_grpfrq #' #' @return Invisibly returns #' \itemize{ #' \item the web page style sheet (\code{page.style}), #' \item the web page content (\code{page.content}), #' \item the complete html-output (\code{page.complete}) and #' \item the html-table with inline-css for use with knitr (\code{knitr}) #' } #' for further use. #' #' @examples #' # prepare sample data set #' data(efc) #' #' # print simple cross table with labels #' \dontrun{ #' if (interactive()) { #' tab_xtab(efc$e16sex, efc$e42dep) #' #' # print cross table with manually set #' # labels and expected values #' tab_xtab( #' efc$e16sex, #' efc$e42dep, #' var.labels = c("Elder's gender", "Elder's dependency"), #' show.exp = TRUE #' ) #' #' # print minimal cross table with labels, total col/row highlighted #' tab_xtab(efc$e16sex, efc$e42dep, show.cell.prc = FALSE, emph.total = TRUE) #' #' # User defined style sheet #' tab_xtab(efc$e16sex, efc$e42dep, #' CSS = list(css.table = "border: 2px solid;", #' css.tdata = "border: 1px solid;", #' css.horline = "border-bottom: double blue;")) #' #' # ordinal data, use Kendall's tau #' tab_xtab(efc$e42dep, efc$quol_5, statistics = "kendall") #' #' # calculate Spearman's rho, with continuity correction #' tab_xtab( #' efc$e42dep, #' efc$quol_5, #' statistics = "spearman", #' exact = FALSE, #' continuity = TRUE #' ) #' } #' } #' @importFrom stats ftable #' @importFrom sjstats crosstable_statistics table_values #' @export tab_xtab <- function(var.row, var.col, weight.by = NULL, title = NULL, var.labels = NULL, value.labels = NULL, wrap.labels = 20, show.obs = TRUE, show.cell.prc = FALSE, show.row.prc = FALSE, show.col.prc = FALSE, show.exp = FALSE, show.legend = FALSE, show.na = FALSE, show.summary = TRUE, drop.empty = TRUE, statistics = c("auto", "cramer", "phi", "spearman", "kendall", "pearson", "fisher"), string.total = "Total", digits = 1, tdcol.n = "black", tdcol.expected = "#339999", tdcol.cell = "#993333", tdcol.row = "#333399", tdcol.col = "#339933", emph.total = FALSE, emph.color = "#f8f8f8", prc.sign = " %", hundret = "100.0", CSS = NULL, encoding = NULL, file = NULL, use.viewer = TRUE, remove.spaces = TRUE, ...) { # -------------------------------------------------------- # check p-value-style option # -------------------------------------------------------- opt <- getOption("p_zero") if (is.null(opt) || opt == FALSE) { p_zero <- "" } else { p_zero <- "0" } # ------------------------------------- # check encoding # ------------------------------------- encoding <- get.encoding(encoding) # match arguments statistics <- match.arg(statistics) # -------------------------------------------------------- # get variable name # -------------------------------------------------------- var.name.row <- get_var_name(deparse(substitute(var.row))) var.name.col <- get_var_name(deparse(substitute(var.col))) # remove empty value-labels if (drop.empty) { var.row <- sjlabelled::drop_labels(var.row) var.col <- sjlabelled::drop_labels(var.col) } # -------------------------------------------------------- # create cross table of frequencies and percentages # -------------------------------------------------------- mydat <- create.xtab.df(var.row, var.col, round.prz = digits, na.rm = !show.na, weight.by = weight.by) # -------------------------------------------------------- # try to automatically set labels is not passed as parameter # -------------------------------------------------------- if (is.null(var.labels)) { var.labels <- c(sjlabelled::get_label(var.row, def.value = var.name.row), sjlabelled::get_label(var.col, def.value = var.name.col)) } # wrap long labels var.labels <- sjmisc::word_wrap(var.labels, wrap.labels, "
") s.var.row <- var.labels[1] s.var.col <- var.labels[2] # -------------------------------------------------------- # Do we have notes for vectors? # -------------------------------------------------------- n.var.row <- comment(var.row) n.var.col <- comment(var.col) # ------------------------------------- # init variable labels # ------------------------------------- labels.var.row <- mydat$labels.cnt labels.var.col <- mydat$labels.grp # do we have labels? if (!is.null(value.labels)) { # need to be a list if (!is.list(value.labels)) { warning("`valueLables` needs to be a `list`-object.", call. = F) } else { labels.var.row <- value.labels[[1]] labels.var.col <- value.labels[[2]] } # correct length of labels? if (length(labels.var.row) != length(mydat$labels.cnt)) { warning("Length of `value.labels` does not match length of category values of `var.row`.", call. = F) labels.var.row <- mydat$labels.cnt } # correct length of labels? if (length(labels.var.col) != length(mydat$labels.grp)) { warning("Length of `value.labels` does not match length of category values of `var.grp`.", call. = F) labels.var.col <- mydat$labels.grp } } # wrap labels labels.var.row <- sjmisc::word_wrap(labels.var.row, wrap.labels, "
") labels.var.col <- sjmisc::word_wrap(labels.var.col, wrap.labels, "
") # add "total" labels.var.row <- c(labels.var.row, string.total) labels.var.col <- c(labels.var.col) # ------------------------------------- # compute table counts and percentages # ------------------------------------- tab <- mydat$mydat[, -1] tab$total <- unname(rowSums(tab)) tab <- rbind(tab, unname(colSums(tab))) tab.cell <- mydat$proptab.cell tab.row <- mydat$proptab.row tab.row$total <- 100 tab.col <- mydat$proptab.col tab.col <- rbind(tab.col, rep(100, times = ncol(tab.col))) tab.expected <- sjstats::table_values(stats::ftable(as.matrix(tab)))$expected # ------------------------------------- # determine total number of columns and rows # ------------------------------------- totalncol <- ncol(tab) totalnrow <- nrow(tab) # ------------------------------------- # table init # ------------------------------------- # init web page header toWrite <- table.header <- sprintf("\n\n\n", encoding) # ------------------------------------- # init style sheet and tags used for css-definitions # we can use these variables for string-replacement # later for return value # ------------------------------------- tag.table <- "table" tag.caption <- "caption" tag.thead <- "thead" tag.tdata <- "tdata" tag.firstcolborder <- "firstcolborder" tag.secondtablerow <- "secondtablerow" tag.leftalign <- "leftalign" tag.centeralign <- "centeralign" tag.lasttablerow <- "lasttablerow" tag.totcol <- "totcol" tag.tothi <- "tothi" tag.summary <- "summary" tag.horline <- "horline" tag.td_ex <- "td_ex" tag.td_cl <- "td_cl" tag.td_rw <- "td_rw" tag.td_c <- "td_c" tag.td_n <- "td_n" css.table <- "border-collapse:collapse; border:none;" css.caption <- "font-weight: bold; text-align:left;" css.thead <- "border-top:double; text-align:center; font-style:italic; font-weight:normal;" css.tdata <- "padding:0.2cm;" css.firstcolborder <- "border-bottom:1px solid;" css.secondtablerow <- "border-bottom:1px solid; text-align:center;" css.leftalign <- "text-align:left; vertical-align:middle;" css.centeralign <- "text-align:center;" css.lasttablerow <- ifelse(isTRUE(emph.total), sprintf(" border-bottom:double; background-color:%s;", emph.color), " border-bottom:double;") css.totcol <- ifelse(isTRUE(emph.total), sprintf(" background-color:%s;", emph.color), "") css.tothi <- "font-weight:bolder; font-style:italic;" css.summary <- "text-align:right; font-size:0.9em; font-style:italic;" css.horline <- "" # ------------------------ # check user defined style sheets # ------------------------ if (!is.null(CSS)) { if (!is.null(CSS[['css.table']])) css.table <- ifelse(substring(CSS[['css.table']], 1, 1) == '+', paste0(css.table, substring(CSS[['css.table']], 2)), CSS[['css.table']]) if (!is.null(CSS[['css.caption']])) css.caption <- ifelse(substring(CSS[['css.caption']], 1, 1) == '+', paste0(css.caption, substring(CSS[['css.caption']], 2)), CSS[['css.caption']]) if (!is.null(CSS[['css.thead']])) css.thead <- ifelse(substring(CSS[['css.thead']], 1, 1) == '+', paste0(css.thead, substring(CSS[['css.thead']], 2)), CSS[['css.thead']]) if (!is.null(CSS[['css.tdata']])) css.tdata <- ifelse(substring(CSS[['css.tdata']], 1, 1) == '+', paste0(css.tdata, substring(CSS[['css.tdata']], 2)), CSS[['css.tdata']]) if (!is.null(CSS[['css.summary']])) css.summary <- ifelse(substring(CSS[['css.summary']], 1, 1) == '+', paste0(css.summary, substring(CSS[['css.summary']], 2)), CSS[['css.summary']]) if (!is.null(CSS[['css.leftalign']])) css.leftalign <- ifelse(substring(CSS[['css.leftalign']], 1, 1) == '+', paste0(css.leftalign, substring(CSS[['css.leftalign']], 2)), CSS[['css.leftalign']]) if (!is.null(CSS[['css.centeralign']])) css.centeralign <- ifelse(substring(CSS[['css.centeralign']], 1, 1) == '+', paste0(css.centeralign, substring(CSS[['css.centeralign']], 2)), CSS[['css.centeralign']]) if (!is.null(CSS[['css.lasttablerow']])) css.lasttablerow <- ifelse(substring(CSS[['css.lasttablerow']], 1, 1) == '+', paste0(css.lasttablerow, substring(CSS[['css.lasttablerow']], 2)), CSS[['css.lasttablerow']]) if (!is.null(CSS[['css.firstcolborder']])) css.firstcolborder <- ifelse(substring(CSS[['css.firstcolborder']], 1, 1) == '+', paste0(css.firstcolborder, substring(CSS[['css.firstcolborder']], 2)), CSS[['css.firstcolborder']]) if (!is.null(CSS[['css.secondtablerow']])) css.secondtablerow <- ifelse(substring(CSS[['css.secondtablerow']], 1, 1) == '+', paste0(css.secondtablerow, substring(CSS[['css.secondtablerow']], 2)), CSS[['css.secondtablerow']]) if (!is.null(CSS[['css.totcol']])) css.totcol <- ifelse(substring(CSS[['css.totcol']], 1, 1) == '+', paste0(css.totcol, substring(CSS[['css.totcol']], 2)), CSS[['css.totcol']]) if (!is.null(CSS[['css.tothi']])) css.tothi <- ifelse(substring(CSS[['css.tothi']], 1, 1) == '+', paste0(css.tothi, substring(CSS[['css.tothi']], 2)), CSS[['css.tothi']]) if (!is.null(CSS[['css.horline']])) css.horline <- ifelse(substring(CSS[['css.horline']], 1, 1) == '+', paste0(css.horline, substring(CSS[['css.horline']], 2)), CSS[['css.horline']]) } # ------------------------------------- # set style sheet # ------------------------------------- page.style <- sprintf("", tag.table, css.table, tag.caption, css.caption, tag.thead, css.thead, tag.tdata, css.tdata, tag.secondtablerow, css.secondtablerow, tag.leftalign, css.leftalign, tag.centeralign, css.centeralign, tag.lasttablerow, css.lasttablerow, tag.totcol, css.totcol, tag.tothi, css.tothi, tag.td_n, tdcol.n, tag.td_c, tdcol.cell, tag.td_rw, tdcol.row, tag.td_cl, tdcol.col, tag.td_ex, tdcol.expected, tag.summary, css.summary, tag.horline, css.horline, tag.firstcolborder, css.firstcolborder) # start writing content toWrite <- paste(toWrite, page.style) toWrite <- paste(toWrite, "\n\n\n") # ------------------------------------- # init first table row # ------------------------------------- page.content <- "\n" # ------------------------------------- # table caption, variable label # ------------------------------------- if (!is.null(title)) page.content <- paste0(page.content, sprintf(" \n", title)) page.content <- paste(page.content, " \n") # ------------------------------------- # column with row-variable-name # ------------------------------------- if (!is.null(n.var.row)) th.title.tag <- sprintf(" title=\"%s\"", n.var.row) else th.title.tag <- "" page.content <- paste(page.content, sprintf(" \n", th.title.tag, s.var.row)) # ------------------------------------- # column with column-variable-name # ------------------------------------- if (!is.null(n.var.col)) th.title.tag <- sprintf(" title=\"%s\"", n.var.col) else th.title.tag <- "" page.content <- paste(page.content, sprintf(" \n", length(labels.var.col), th.title.tag, s.var.col)) # ------------------------------------- # total-column # ------------------------------------- page.content <- paste(page.content, sprintf(" \n", string.total)) page.content <- paste(page.content, " \n") # ------------------------------------- # init second table row # ------------------------------------- page.content <- paste(page.content, "\n \n") # ------------------------------------- # column variable labels # ------------------------------------- for (i in seq_along(labels.var.col)) { page.content <- paste(page.content, sprintf(" \n", labels.var.col[i])) } page.content <- paste(page.content, " \n") # ------------------------------------- # table content # ------------------------------------- rowlabelcnt <- seq_len(length(labels.var.row)) # ------------------------------------- # iterate all table data rows # ------------------------------------- for (irow in seq_len(totalnrow)) { # ------------------------------------- # start new table row # ------------------------------------- page.content <- paste(page.content, "\n ") # ------------------------------------- # set row variable label # ------------------------------------- if (irow == totalnrow) { css_last_row_th <- "lasttablerow tothi " css_last_row <- " lasttablerow" } else { css_last_row_th <- " " css_last_row <- "" } page.content <- paste(page.content, sprintf("\n ", css_last_row_th, labels.var.row[rowlabelcnt[irow]])) # ------------------------------------- # iterate all data columns # ------------------------------------- for (icol in seq_len(totalncol)) { cellstring <- "" # ------------------------------------- # first table cell data contains observed values # ------------------------------------- if (show.obs) cellstring <- sprintf("%i", tab[irow, icol]) # ------------------------------------- # if we have expected values, add them to table cell # ------------------------------------- if (show.exp) { if (!sjmisc::is_empty(cellstring)) cellstring <- paste0(cellstring, "
") cellstring <- paste(cellstring, sprintf("%s", tab.expected[irow, icol]), sep = "") } # ------------------------------------- # if we have row-percentage, add percentage value to table cell # ------------------------------------- if (show.row.prc) { if (!sjmisc::is_empty(cellstring)) cellstring <- paste0(cellstring, "
") cellstring <- paste(cellstring, sprintf("%s%s", tab.row[irow, icol], prc.sign), sep = "") } # ------------------------------------- # if we have col-percentage, add percentage value to table cell # ------------------------------------- if (show.col.prc) { if (!sjmisc::is_empty(cellstring)) cellstring <- paste0(cellstring, "
") cellstring <- paste(cellstring, sprintf("%s%s", tab.col[irow, icol], prc.sign), sep = "") } # ------------------------------------- # if we have cell-percentage, add percentage value to table cell # ------------------------------------- if (show.cell.prc) { if (!sjmisc::is_empty(cellstring)) cellstring <- paste0(cellstring, "
") cellstring <- paste(cellstring, sprintf("%s%s", tab.cell[irow, icol], prc.sign), sep = "") } # ------------------------------------- # set column variable label # ------------------------------------- css_tot_col <- ifelse(icol == totalncol, " totcol", "") # ------------------------------------- # write table cell data # ------------------------------------- page.content <- paste(page.content, sprintf("\n ", ifelse(css_last_row == "", css_tot_col, css_last_row), cellstring), sep = "") } # close table row page.content <- paste(page.content, "\n \n") } # ------------------------------------- # table summary # ------------------------------------- if (show.summary) { xtsdf <- data.frame(var.row, var.col) if (!is.null(weight.by)) { xtsdf$weights <- weight.by xt_stat <- sjstats::crosstable_statistics( data = xtsdf, weights = "weights", statistics = statistics, ... ) } else { xt_stat <- sjstats::crosstable_statistics( data = xtsdf, statistics = statistics, ... ) } # fisher's exact test? if (xt_stat$fisher) pstring <- "Fisher's p" else pstring <- "p" page.content <- paste( page.content, sprintf( " ", totalncol + 1, xt_stat$stat.html, xt_stat$statistic, xt_stat$df, xt_stat$method.html, xt_stat$estimate, pstring, xt_stat$p.value ), sep = "" ) # close table row page.content <- paste(page.content, "\n \n") } # ------------------------------------- # finish table # ------------------------------------- page.content <- paste(page.content, "\n
%s
%s%s%s
%s
%s%s
%s=%.3f · df=%i · %s=%.3f · %s=%.3f
") # ------------------------------------- # print legend # ------------------------------------- if (show.legend) { # add new paragraph page.content <- paste(page.content, "

\n ") # ----------------- # show observed? # ----------------- if (show.obs) { page.content <- paste(page.content, "observed values
\n") } # ----------------- # show expected? # ----------------- if (show.exp) { page.content <- paste(page.content, "expected values
\n") } # ----------------- # show row percentage? # ----------------- if (show.row.prc) { page.content <- paste(page.content, sprintf("% within %s
\n", gsub("
", " ", s.var.row, fixed = TRUE))) } # ----------------- # show row percentage? # ----------------- if (show.col.prc) { page.content <- paste(page.content, sprintf("% within %s
\n", gsub("
", " ", s.var.col, fixed = TRUE))) } # ----------------- # show row percentage? # ----------------- if (show.cell.prc) { page.content <- paste(page.content, "% of total\n") } # close paragraph page.content <- paste(page.content, "

\n") } # ------------------------------------- # add table to return value list, so user can access each # single frequency table # ------------------------------------- toWrite <- paste(toWrite, page.content, "\n") # ------------------------------------- # finish html page # ------------------------------------- toWrite <- paste0(toWrite, "") # ------------------------------------- # replace class attributes with inline style, # useful for knitr # ------------------------------------- # copy page content # ------------------------------------- knitr <- page.content # ------------------------------------- # set style attributes for main table tags # ------------------------------------- knitr <- gsub("class=", "style=", knitr, fixed = TRUE, useBytes = TRUE) knitr <- gsub(" 1) { tnames <- names(title) if (obj_has_name(dat, "facet") && !is.null(tnames)) { if (all(tnames %in% dat$facet)) { for (i in tnames) { dat$facet[which(dat$facet == i)] <- title[i] } title <- "" } } if (obj_has_name(dat, "response.level") && !is.null(tnames)) { if (all(tnames %in% dat$response.level)) { for (i in tnames) { dat$response.level[which(dat$response.level == i)] <- title[i] } title <- "" } } } # se needs to be logical from here on if (!is.null(se) && !is.logical(se)) se <- TRUE # for stan-models, we can define the style of the Bayesian point estimate, # which may be a line or a dot. if (missing(bpe.style) || is.null(bpe.style)) bpe.style <- "line" if (missing(value.size) || is.null(value.size)) value.size <- 4 plot_model_estimates( model = model, dat = dat, tf = tf, se = se, terms = terms, group.terms = group.terms, rm.terms = rm.terms, sort.est = sort.est, title = title, axis.title = axis.title, axis.labels = axis.labels, axis.lim = axis.lim, grid.breaks = grid.breaks, show.intercept = show.intercept, show.values = show.values, show.p = show.p, value.offset = value.offset, digits = digits, geom.colors = geom.colors, geom.size = geom.size, line.size = line.size, bpe.style = bpe.style, bpe.color = bpe.color, term.order = order.terms, vline.color = vline.color, value.size = value.size, facets = facets, p.threshold = p.threshold, ci.style = ci.style, ... ) } sjPlot/R/tidiers.R0000644000176200001440000003441514136575051013532 0ustar liggesusers#' @importFrom stats qnorm pnorm #' @importFrom effectsize standardize #' @importFrom parameters model_parameters #' @importFrom insight standardize_names tidy_model <- function( model, ci.lvl, tf, type, bpe, robust, facets, show.zeroinf, p.val, standardize = FALSE, bootstrap = FALSE, iterations = 1000, seed = NULL, p_adjust = NULL, keep = NULL, drop = NULL, ...) { if (!is.logical(standardize) && standardize == "") standardize <- NULL if (is.logical(standardize) && standardize == FALSE) standardize <- NULL if (is.stan(model)) { out <- tidy_stan_model(model, ci.lvl, tf, type, bpe, show.zeroinf, facets, ...) } else { if (!is.null(standardize)) { if (isTRUE(standardize)) standardize <- "std" model <- effectsize::standardize(model, two_sd = isTRUE(standardize == "std2")) } if (!is.null(seed)) { set.seed(seed) } component <- ifelse(show.zeroinf & insight::model_info(model)$is_zero_inflated, "all", "conditional") if (is.null(p.val)) { if (inherits(model, c("glm", "polr"))) { p.val <- "profile" } else { p.val <- "wald" } } ci_method <- switch( p.val, "r" = , "residual" = "residual", "wald" = "wald", "kr" = , "kenward" = "kenward", "s" = , "satterthwaite" = "satterthwaite", "n" = , "normal" = "normal", "profile" = "profile", p.val ) if (!is.null(robust) && !is.null(robust$vcov.fun)) { if (grepl("^vcov", robust$vcov.fun)) { robust$vcov.fun <- sub("^vcov", "", robust$vcov.fun) } model_params <- parameters::model_parameters(model, ci = ci.lvl, component = component, bootstrap = bootstrap, iterations = iterations, robust = TRUE, vcov_estimation = robust$vcov.fun, vcov_type = robust$vcov.type, vcov_args = robust$vcov.args, ci_method = ci_method, p_adjust = p_adjust, effects = "fixed", keep = keep, drop = drop) } else { model_params <- parameters::model_parameters(model, ci = ci.lvl, component = component, bootstrap = bootstrap, iterations = iterations, ci_method = ci_method, p_adjust = p_adjust, effects = "fixed", keep = keep, drop = drop) } out <- insight::standardize_names(model_params, style = "broom") # warning for p-values? tryCatch({ if (insight::model_info(model)$is_mixed && ci_method == "kenward" && insight::find_algorithm(model)$algorithm != "REML") { warning("Model was not fitted by REML. Re-fitting model using REML, but p-values, df, etc. still might be unreliable.", call. = FALSE) } }, error = function(e) { NULL } ) column <- which(colnames(out) == "response") if (length(column)) colnames(out)[column] <- ifelse(isTRUE(facets), "facet", "response.level") column <- which(colnames(out) == "component") if (length(column)) colnames(out)[column] <- "wrap.facet" if (!is.null(out$effect) && "random" %in% out$effect) { out$group <- NULL } column <- which(colnames(out) == "group") if (length(column)) colnames(out)[column] <- "wrap.facet" # remove duplicated column names dupl_cols <- duplicated(colnames(out)) if (any(dupl_cols)) { out <- out[!dupl_cols] } if ("component" %in% colnames(out)) { out$component[out$component == "zero_inflated"] <- "Zero-Inflated Model" out$component[out$component == "zi"] <- "Zero-Inflated Model" out$component[out$component == "conditional"] <- "Conditional Model" out$component[out$component == "count"] <- "Conditional Model" } attr(out, "pretty_names") <- attributes(model_params)$pretty_names } out } #' @importFrom stats mad formula #' @importFrom bayestestR ci #' @importFrom insight is_multivariate model_info #' @importFrom sjmisc var_rename add_columns is_empty typical_value #' @importFrom dplyr select filter slice inner_join n_distinct #' @importFrom purrr map_dbl #' @importFrom rlang .data tidy_stan_model <- function(model, ci.lvl, tf, type, bpe, show.zeroinf, facets, ...) { # set defaults p.inner <- .5 p.outer <- ci.lvl # get model information modfam <- insight::model_info(model) if (insight::is_multivariate(model)) modfam <- modfam[[1]] # additional arguments for 'effects()'-function? add.args <- lapply(match.call(expand.dots = F)$`...`, function(x) x) # check whether we have "prob.inner" and "prob.outer" argument # and if so, use these for CI and Bayesian point estimate if ("prob.inner" %in% names(add.args)) p.inner <- eval(add.args[["prob.inner"]]) if ("prob.outer" %in% names(add.args)) p.outer <- eval(add.args[["prob.outer"]]) # get two CI-intervals if (type == "re") ty <- "random" else ty <- "fixed" d1 <- bayestestR::ci(model, ci = p.outer, effects = ty, component = "all") d2 <- bayestestR::ci(model, ci = p.inner, effects = ty, component = "all") if (!is.null(tf)) { funtrans <- match.fun(tf) d1$CI_low <- funtrans(d1$CI_low) d1$CI_high <- funtrans(d1$CI_high) d2$CI_low <- funtrans(d2$CI_low) d2$CI_high <- funtrans(d2$CI_high) } # bind columns, so we have inner and outer hdi interval dat <- d2 %>% dplyr::select(.data$CI_low, .data$CI_high) %>% sjmisc::var_rename(CI_low = "conf.low50", CI_high = "conf.high50") %>% sjmisc::add_columns(d1) %>% sjmisc::var_rename(CI_low = "conf.low", CI_high = "conf.high", Parameter = "term") %>% dplyr::select(-.data$CI, -.data$Effects, -.data$Component) # for brmsfit models, we need to remove some columns here to # match data rows later mod.dat <- as.data.frame(model, optional = FALSE) if (inherits(model, "brmsfit")) { re.sd <- string_starts_with("sd_", x = colnames(mod.dat)) re.cor <- string_starts_with("cor_", x = colnames(mod.dat)) lp <- string_starts_with("lp__", x = colnames(mod.dat)) resp.cor <- string_starts_with("rescor__", x = colnames(mod.dat)) priors <- string_starts_with("prior_", x = colnames(mod.dat)) xme <- string_starts_with(pattern = "Xme_me", x = colnames(mod.dat)) xme.sd <- string_starts_with(pattern = "sdme_me", x = colnames(mod.dat)) brmsfit.removers <- unique(c(re.sd, re.cor, lp, resp.cor, priors, xme, xme.sd)) if (!sjmisc::is_empty(brmsfit.removers)) mod.dat <- dplyr::select(mod.dat, !! -brmsfit.removers) # also clean prepared data frame resp.cor <- string_starts_with("rescor__", x = dat$term) if (!sjmisc::is_empty(resp.cor)) dat <- dplyr::slice(dat, !! -resp.cor) } # do transformation on posterior samples first, # then summarize (see https://discourse.mc-stan.org/t/monotonic-effects-in-non-gaussian-models/6353/5) # need to transform point estimate as well if (!is.null(tf)) { funtrans <- match.fun(tf) all.cols <- sjmisc::seq_col(mod.dat) simp.pars <- string_starts_with("simo_mo", colnames(mod.dat)) if (!sjmisc::is_empty(simp.pars)) all.cols <- all.cols[-simp.pars] for (i in all.cols) mod.dat[[i]] <- funtrans(mod.dat[[i]]) } # add bayesian point estimate est <- purrr::map_dbl(mod.dat, ~ sjmisc::typical_value(.x, fun = bpe)) dat <- data_frame( term = names(est), estimate = est, p.value = 0, std.error = purrr::map_dbl(mod.dat, stats::mad) ) %>% dplyr::inner_join( dat, by = "term" ) # sort columns, for tab_model() sorted_columns <- intersect( c("term", "estimate", "std.error", "conf.low", "conf.high", "conf.low50", "conf.high50", "p.value"), colnames(dat) ) dat <- dat[, sorted_columns] # remove some of the information not needed for plotting if ("sigma" %in% dat$term) dat <- dplyr::filter(dat, .data$term != "sigma") if ("lp__" %in% dat$term) dat <- dplyr::filter(dat, .data$term != "lp__") if ("shape" %in% dat$term) dat <- dplyr::filter(dat, .data$term != "shape") # remove sd_c and cor_ row re <- string_starts_with("sd_", x = dat$term) if (!sjmisc::is_empty(re)) dat <- dplyr::slice(dat, !! -re) re <- string_starts_with("cor_", x = dat$term) if (!sjmisc::is_empty(re)) dat <- dplyr::slice(dat, !! -re) # check if we need to keep or remove random effects re <- string_starts_with("b[", x = dat$term) re.s <- string_starts_with("Sigma[", x = dat$term) re.i <- intersect( string_starts_with("r_", x = dat$term), string_ends_with(".", x = dat$term) ) # and all random effect error terms if (!sjmisc::is_empty(re.s)) dat <- dplyr::slice(dat, !! -re.s) if (type == "est") { # remove all random effect intercepts if (!sjmisc::is_empty(re)) dat <- dplyr::slice(dat, !! -re) # remove random effects from brmsfit-models if (!sjmisc::is_empty(re.i)) dat <- dplyr::slice(dat, !! -re.i) } else if (type == "re") { # remove all random effect intercepts if (!sjmisc::is_empty(re)) dat <- dplyr::slice(dat, !! re) # remove random effects from brmsfit-models if (!sjmisc::is_empty(re.i)) dat <- dplyr::slice(dat, !! re.i) } # for plot-type random effects, make sure that the random effects # are plotted as facet grid, grouped by groups if (type == "re") { dat$facet <- "re" # find random intercepts ri <- grep("b\\[\\(Intercept\\) (.*)\\]", dat$term) if (!sjmisc::is_empty(ri)) { dat$facet[ri] <- "(Intercept)" dat$term[ri] <- gsub("b\\[\\(Intercept\\) (.*)\\]", "\\1", dat$term[ri]) } # find random intercepts ri1 <- grep("r_(.*)\\.(.*)\\.", dat$term) ri2 <- which(gsub("r_(.*)\\.(.*)\\.", "\\2", dat$term) == "Intercept") if (!sjmisc::is_empty(ri1)) { ri <- intersect(ri1, ri2) dat$facet[ri] <- "(Intercept)" dat$term[ri] <- gsub("r_(.*)\\.(.*)\\.", "\\1", dat$term[ri]) } # fix multiple random intercepts if (inherits(model, "brmsfit")) { pattern <- "(.*)\\.(.*)" } else { pattern <- "(.*)\\:(.*)" } interc <- which(dat$facet == "(Intercept)") if (!sjmisc::is_empty(interc)) { interc.grps <- gsub(pattern, "\\1", dat$term[interc]) resp.lvl <- gsub(pattern, "\\2", dat$term[interc]) if (!sjmisc::is_empty(interc.grps) && dplyr::n_distinct(interc.grps) > 1) { dat$facet[interc] <- sprintf("(Intercept: %s)", interc.grps) dat$term[interc] <- resp.lvl } } # find random slopes rs1 <- grep("b\\[(.*) (.*)\\]", dat$term) rs2 <- which(gsub("b\\[(.*) (.*)\\]", "\\1", dat$term) != "(Intercept)") if (!sjmisc::is_empty(rs1)) { rs <- intersect(rs1, rs2) rs.string <- gsub("b\\[(.*) (.*)\\]", "\\1", dat$term[rs]) dat$facet[rs] <- rs.string dat$term[rs] <- gsub("b\\[(.*) (.*)\\]", "\\2", dat$term[rs]) } # find random slopes rs1 <- grep("r_(.*)\\.(.*)\\.", dat$term) rs2 <- which(gsub("r_(.*)\\.(.*)\\.", "\\2", dat$term) != "Intercept") if (!sjmisc::is_empty(rs1)) { rs <- intersect(rs1, rs2) rs.string <- gsub("r_(.*)\\.(.*)\\.", "\\2", dat$term[rs]) dat$facet[rs] <- rs.string dat$term[rs] <- gsub("r_(.*)\\.(.*)\\.", "\\1", dat$term[rs]) } } # categorical model? if (inherits(model, "brmsfit") && modfam$is_categorical) { # terms of categorical models are prefixed with "mu" if (length(string_starts_with("b_mu", x = dat$term)) == nrow(dat)) { dat$term <- substr(dat$term, 5, max(nchar(dat$term))) # create "response-level" variable dat <- sjmisc::add_variables(dat, response.level = "", .before = 1) dat$response.level <- gsub("(.*)\\_(.*)", "\\1", dat$term) dat$term <- gsub("(.*)\\_(.*)", "\\2", dat$term) } } # multivariate-response model? if (inherits(model, "brmsfit") && insight::is_multivariate(model)) { # get response variables responses <- stats::formula(model)$responses # also clean prepared data frame resp.sigma1 <- string_starts_with("sigma_", x = dat$term) resp.sigma2 <- string_starts_with("b_sigma_", x = dat$term) resp.sigma <- c(resp.sigma1, resp.sigma2) if (!sjmisc::is_empty(resp.sigma)) dat <- dplyr::slice(dat, !! -resp.sigma) # create "response-level" variable dat <- sjmisc::add_variables(dat, response.level = "", .before = 1) # copy name of response into new character variable # and remove response name from term name for (i in responses) { m <- grep(pattern = sprintf("_%s_", i), x = dat$term) dat$response.level[intersect(which(dat$response.level == ""), m)] <- i dat$term <- gsub(sprintf("b_%s_", i), "", dat$term, fixed = TRUE) dat$term <- gsub(sprintf("s_%s_", i), "", dat$term, fixed = TRUE) } # check whether each category should be printed in facets, or # in a single graph (with dodged geoms) if (!missing(facets) && isTRUE(facets)) colnames(dat)[1] <- "facet" else colnames(dat)[1] <- "response.level" } # do we have a zero-inflation model? if (modfam$is_zero_inflated || sjmisc::str_contains(dat$term, "b_zi_", ignore.case = T)) { dat$wrap.facet <- "Conditional Model" # zero-inflated part zi <- string_starts_with("b_zi_", x = dat$term) # check if zero-inflated part should be shown or removed if (show.zeroinf) { dat$wrap.facet[zi] <- "Zero-Inflated Model" dat$term[zi] <- sub(pattern = "b_zi_", replacement = "b_", x = dat$term[zi], fixed = T) } else { if (!sjmisc::is_empty(zi)) dat <- dplyr::slice(dat, !! -zi) } } # check model for monotonic effects simplex.terms <- string_starts_with(pattern = "simo_mo", x = dat$term) if (!sjmisc::is_empty(simplex.terms)) { if (!obj_has_name(dat, "wrap.facet")) { dat$wrap.facet <- "" dat$wrap.facet[simplex.terms] <- "Simplex Parameters" } else { dat$wrap.facet[simplex.terms] <- sprintf( "%s (Simplex Parameters)", dat$wrap.facet[simplex.terms] ) } } # remove facet column if not necessary if (!show.zeroinf && obj_has_name(dat, "wrap.facet")) dat <- dplyr::select(dat, -.data$wrap.facet) dat } .get_confint <- function(ci.lvl = .95) { if (!is.null(ci.lvl) && !is.na(ci.lvl)) (1 + ci.lvl) / 2 else .975 } sjPlot/R/plot_type_int.R0000644000176200001440000001465013713227102014746 0ustar liggesusers#' @importFrom insight get_data find_interactions #' @importFrom stats formula sd quantile #' @importFrom purrr map map_lgl map_chr #' @importFrom sjmisc trim is_empty str_contains is_float #' @importFrom dplyr select n_distinct #' @importFrom ggeffects ggpredict #' @importFrom graphics plot plot_type_int <- function(model, mdrt.values, ci.lvl, pred.type, facets, show.data, jitter, geom.colors, axis.title, title, legend.title, axis.lim, case, show.legend, dot.size, line.size, ...) { # interaction terms are separated with ":" int.terms <- insight::find_interactions(model, component = "conditional", flatten = TRUE) # stop if no interaction found if (is.null(int.terms)) stop("No interaction term found in model.", call. = F) # get interaction terms and model frame ia.terms <- purrr::map(int.terms, ~ sjmisc::trim(unlist(strsplit(.x, "[\\*:]")))) mf <- insight::get_data(model) pl <- list() # intertate interaction terms for (i in 1:length(ia.terms)) { ia <- ia.terms[[i]] find.fac <- purrr::map_lgl(ia, ~ is_categorical(mf[[.x]])) # find all non-categorical variables, except first # term, which is considered as being along the x-axis check_cont <- ia[-1][!find.fac[2:length(find.fac)]] # if we have just categorical as interaction terms, # we plot all category values if (!sjmisc::is_empty(check_cont)) { # get data from continuous interaction terms. we # need this to compute the specific values that # should be used as group characteristic for the plot cont_terms <- dplyr::select(mf, !! check_cont) # for quartiles used as moderator values, make sure # that the variable's range is large enough to compute # quartiles mdrt.val <- mv_check(mdrt.values = mdrt.values, cont_terms) # prepare terms for ggpredict()-call. terms is a character-vector # with term name and values to plot in square brackets terms <- purrr::map_chr(check_cont, function(x) { if (mdrt.val == "minmax") { ct.min <- min(cont_terms[[x]], na.rm = TRUE) ct.max <- max(cont_terms[[x]], na.rm = TRUE) if (sjmisc::is_float(ct.min) || sjmisc::is_float(ct.max)) sprintf("%s [%.2f,%.2f]", x, ct.min, ct.max) else sprintf("%s [%i,%i]", x, ct.min, ct.max) } else if (mdrt.val == "meansd") { mw <- mean(cont_terms[[x]], na.rm = TRUE) sabw <- stats::sd(cont_terms[[x]], na.rm = TRUE) sprintf("%s [%.2f,%.2f,%.2f]", x, mw, mw - sabw, mw + sabw) } else if (mdrt.val == "zeromax") { ct.max <- max(cont_terms[[x]], na.rm = TRUE) if (sjmisc::is_float(ct.max)) sprintf("%s [0,%.2f]", x, ct.max) else sprintf("%s [0,%i]", x, ct.max) } else if (mdrt.val == "quart") { qu <- as.vector(stats::quantile(cont_terms[[x]], na.rm = T)) sprintf("%s [%.2f,%.2f,%.2f]", x, qu[3], qu[2], qu[4]) } else { x } }) ia[match(check_cont, ia)] <- terms } # compute marginal effects for interaction terms dat <- ggeffects::ggpredict( model = model, terms = ia, ci.lvl = ci.lvl, type = pred.type, full.data = FALSE, ... ) # evaluate dots-arguments alpha <- .15 dodge <- .1 dot.alpha <- .5 log.y <- FALSE add.args <- lapply(match.call(expand.dots = F)$`...`, function(x) x) if ("alpha" %in% names(add.args)) alpha <- eval(add.args[["alpha"]]) if ("dodge" %in% names(add.args)) dodge <- eval(add.args[["dodge"]]) if ("dot.alpha" %in% names(add.args)) dot.alpha <- eval(add.args[["dot.alpha"]]) if ("log.y" %in% names(add.args)) log.y <- eval(add.args[["log.y"]]) # select color palette if (is.null(geom.colors) || geom.colors[1] != "bw") geom.colors <- col_check2(geom.colors, dplyr::n_distinct(dat$group)) # save plot of marginal effects for interaction terms p <- graphics::plot( dat, ci = !is.na(ci.lvl), facets = facets, rawdata = show.data, colors = geom.colors, jitter = jitter, use.theme = FALSE, case = case, show.legend = show.legend, dot.alpha = dot.alpha, alpha = alpha, dodge = dodge, log.y = log.y, dot.size = dot.size, line.size = line.size ) # set axis and plot titles if (!is.null(axis.title)) { if (length(axis.title) > 1) { p <- p + labs(x = axis.title[1], y = axis.title[2]) } else { p <- p + labs(y = axis.title) } } # set axis and plot titles if (!is.null(title)) p <- p + ggtitle(title) # set axis and plot titles if (!is.null(legend.title)) p <- p + labs(colour = legend.title) # set axis limits if (!is.null(axis.lim)) { if (is.list(axis.lim)) p <- p + xlim(axis.lim[[1]]) + ylim(axis.lim[[2]]) else p <- p + ylim(axis.lim) } # add plot result to final return value if (length(ia.terms) == 1) pl <- p else pl[[length(pl) + 1]] <- p } pl } #' @importFrom stats na.omit is_categorical <- function(x) { is.factor(x) || (length(unique(stats::na.omit(x))) < 3) } #' @importFrom stats quantile #' @importFrom purrr map_dbl mv_check <- function(mdrt.values, x) { # for quartiles used as moderator values, make sure # that the variable's range is large enough to compute # quartiles if (mdrt.values == "quart") { if (!is.data.frame(x)) x <- as.data.frame(x) mvc <- purrr::map_dbl(x, ~ length(unique(as.vector(stats::quantile(.x, na.rm = T))))) if (any(mvc < 3)) { # tell user that quart won't work message("Could not compute quartiles, too small range of moderator variable. Defaulting `mdrt.values` to `minmax`.") mdrt.values <- "minmax" } } mdrt.values } sjPlot/R/plot_point_estimates.R0000644000176200001440000001622613617314260016330 0ustar liggesusers#' @importFrom dplyr n_distinct if_else #' @importFrom sjmisc is_empty #' @importFrom sjlabelled as_numeric #' @importFrom insight find_response plot_point_estimates <- function(model, dat, tf, title, axis.labels, axis.title, axis.lim, grid.breaks, show.values, value.offset, geom.size, line.size, geom.colors, bpe.style, bpe.color, vline.color, value.size, facets, ci.style, ...) { # some defaults... size.inner <- .07 spacing <- .4 width <- if (is.stan(model)) .06 else 0 # check additional arguments, for stan-geoms add.args <- lapply(match.call(expand.dots = F)$`...`, function(x) x) if ("size.inner" %in% names(add.args)) size.inner <- eval(add.args[["size.inner"]]) if ("width" %in% names(add.args)) width <- eval(add.args[["width"]]) if ("spacing" %in% names(add.args)) spacing <- eval(add.args[["spacing"]]) # need some additional data, for stan-geoms dat$xpos <- sjlabelled::as_numeric(dat$term, start.at = 1) dat$xmin <- dat$xpos - (geom.size * size.inner) dat$xmax <- dat$xpos + (geom.size * size.inner) # set default for empty titles/labels if (sjmisc::is_empty(title)) title <- NULL if (sjmisc::is_empty(axis.labels)) axis.labels <- attributes(dat)$pretty_names if (sjmisc::is_empty(axis.title)) axis.title <- NULL # if we have non-estimable coefficients (i.e. missings) # remove them here no_coefficient <- which(is.na(dat$estimate)) if (length(no_coefficient) > 0) { dat <- dat[-no_coefficient, ] } # axis limits and tick breaks for y-axis axis.scaling <- axis_limits_and_ticks( axis.lim = axis.lim, min.val = min(dat$conf.low), max.val = max(dat$conf.high), grid.breaks = grid.breaks, exponentiate = isTRUE(tf == "exp"), min.est = min(dat$estimate), max.est = max(dat$estimate) ) # based on current ggplot theme, highlights vertical default line yintercept = dplyr::if_else(isTRUE(tf == "exp"), 1, 0) layer_vertical_line <- geom_intercept_line(yintercept, axis.scaling, vline.color) # check whether we have a multinomial log. reg. model multinomial <- obj_has_name(dat, "response.level") # basis aes mapping if (multinomial) p <- ggplot(dat, aes_string(x = "term", y = "estimate", colour = "response.level", fill = "response.level")) else p <- ggplot(dat, aes_string(x = "term", y = "estimate", colour = "group", fill = "group")) if (is.stan(model)) { if (ci.style == "whisker") { hdi_alpha <- 1 dot.fac <- 1.2 } else { hdi_alpha <- .5 dot.fac <- 3 } # special setup for rstan-models p <- p + layer_vertical_line if (ci.style == "whisker") p <- p + geom_errorbar(aes_string(ymin = "conf.low", ymax = "conf.high"), size = line.size, width = width) else p <- p + geom_rect(aes_string(ymin = "conf.low", ymax = "conf.high", xmin = "xmin", xmax = "xmax"), alpha = hdi_alpha, colour = "white", size = .5) # only add inner region if requested if (size.inner > 0) { p <- p + geom_rect(aes_string(ymin = "conf.low50", ymax = "conf.high50", xmin = "xmin", xmax = "xmax"), alpha = hdi_alpha, colour = "white", size = .5) } # define style for Bayesian point estimate if (bpe.style == "line") { if (is.null(bpe.color)) { p <- p + geom_segment(aes_string(x = "xmin", xend = "xmax", y = "estimate", yend = "estimate"), size = geom.size * .9) } else { p <- p + geom_segment(aes_string(x = "xmin", xend = "xmax", y = "estimate", yend = "estimate"), colour = bpe.color, size = geom.size * .9) } } else if (is.null(bpe.color)) { p <- p + geom_point(aes_string(y = "estimate"), fill = "white", size = geom.size * dot.fac) } else { p <- p + geom_point(aes_string(y = "estimate"), fill = "white", colour = bpe.color, size = geom.size * dot.fac) } } else { # setup base plot p <- p + layer_vertical_line if (multinomial) { p <- p + geom_point(size = geom.size, position = position_dodge(width = spacing)) + geom_errorbar(aes_string(ymin = "conf.low", ymax = "conf.high"), position = position_dodge(width = spacing), width = width, size = line.size) } else { p <- p + geom_point(size = geom.size) + geom_errorbar(aes_string(ymin = "conf.low", ymax = "conf.high"), width = width, size = line.size) } } # set up base aes, either with or w/o groups p <- p + coord_flip() if (multinomial) { col.len <- dplyr::n_distinct(dat$response.level) # remove legend p <- p + guides(fill = "none") } else { col.len <- dplyr::n_distinct(dat$group) # remove legend p <- p + guides(colour = "none", fill = "none") } # add value labels if (show.values) p <- p + geom_text( aes_string(label = "p.label"), nudge_x = value.offset, show.legend = FALSE, size = value.size ) # set axis labels if (!is.null(axis.labels)) p <- p + scale_x_discrete(labels = axis.labels) # we need transformed scale for exponentiated estimates has_zeroinf <- (obj_has_name(dat, "wrap.facet") && dplyr::n_distinct(dat$wrap.facet, na.rm = TRUE) > 1) if (isTRUE(tf == "exp")) { if (has_zeroinf) { p <- p + scale_y_continuous(trans = "log10") } else { p <- p + scale_y_continuous( trans = "log10", limits = axis.scaling$axis.lim, breaks = axis.scaling$ticks, labels = prettyNum ) } } else { if (has_zeroinf) { } else { p <- p + scale_y_continuous( limits = axis.scaling$axis.lim, breaks = axis.scaling$ticks, labels = axis.scaling$ticks ) } } # set colors p <- p + scale_colour_manual(values = col_check2(geom.colors, col.len)) + scale_fill_manual(values = col_check2(geom.colors, col.len)) # facets? if (obj_has_name(dat, "facet") && dplyr::n_distinct(dat$facet, na.rm = TRUE) > 1) p <- p + facet_grid(~facet) else if (has_zeroinf) p <- p + facet_wrap(~wrap.facet, ncol = 1, scales = "free") # set axis and plot titles if (length(axis.title) > 1) axis.title <- axis.title[1] p <- p + labs( x = NULL, y = axis.title, title = title ) # for multinomial models, set response variable name as name for legend if (multinomial) p <- p + labs(colour = insight::find_response(model)) p } sjPlot/R/plot_model.R0000644000176200001440000012055514073077247014232 0ustar liggesusers#' @title Plot regression models #' @name plot_model #' #' @description #' \code{plot_model()} creates plots from regression models, either #' estimates (as so-called forest or dot whisker plots) or marginal effects. #' #' @param model A regression model object. Depending on the \code{type}, many #' kinds of models are supported, e.g. from packages like \pkg{stats}, #' \pkg{lme4}, \pkg{nlme}, \pkg{rstanarm}, \pkg{survey}, \pkg{glmmTMB}, #' \pkg{MASS}, \pkg{brms} etc. #' @param type Type of plot. There are three groups of plot-types: \cr \cr #' \emph{Coefficients} (\href{https://strengejacke.github.io/sjPlot/articles/plot_model_estimates.html}{related vignette}) #' \describe{ #' \item{\code{type = "est"}}{Forest-plot of estimates. If the fitted model #' only contains one predictor, slope-line is plotted.} #' \item{\code{type = "re"}}{For mixed effects models, plots the random #' effects.} #' \item{\code{type = "std"}}{Forest-plot of standardized coefficients.} #' \item{\code{type = "std2"}}{Forest-plot of standardized coefficients, #' however, standardization is done by dividing by two SD (see 'Details').} #' } #' \emph{Marginal Effects} (\href{https://strengejacke.github.io/sjPlot/articles/plot_marginal_effects.html}{related vignette}) #' \describe{ #' \item{\code{type = "pred"}}{Predicted values (marginal effects) for #' specific model terms. See \code{\link[ggeffects]{ggpredict}} for details.} #' \item{\code{type = "eff"}}{Similar to \code{type = "pred"}, however, #' discrete predictors are held constant at their proportions (not reference #' level). See \code{\link[ggeffects]{ggeffect}} for details.} #' \item{\code{type = "emm"}}{Similar to \code{type = "eff"}, see #' \code{\link[ggeffects]{ggemmeans}} for details.} #' \item{\code{type = "int"}}{Marginal effects of interaction terms in #' \code{model}.} #' } #' \emph{Model diagnostics} #' \describe{ #' \item{\code{type = "slope"}}{Slope of coefficients for each single #' predictor, against the response (linear relationship between each model #' term and response). See 'Details'.} #' \item{\code{type = "resid"}}{Slope of coefficients for each single #' predictor, against the residuals (linear relationship between each model #' term and residuals). See 'Details'.} #' \item{\code{type = "diag"}}{Check model assumptions. See 'Details'.} #' } #' \strong{Note:} For mixed models, the diagnostic plots like linear relationship #' or check for Homoscedasticity, do \strong{not} take the uncertainty of #' random effects into account, but is only based on the fixed effects part #' of the model. #' @param transform A character vector, naming a function that will be applied #' on estimates and confidence intervals. By default, \code{transform} will #' automatically use \code{"exp"} as transformation for applicable classes of #' \code{model} (e.g. logistic or poisson regression). Estimates of linear #' models remain untransformed. Use \code{NULL} if you want the raw, #' non-transformed estimates. #' @param terms Character vector with the names of those terms from \code{model} #' that should be plotted. This argument depends on the plot-type: #' \describe{ #' \item{\emph{Coefficients}}{Select terms that should be plotted. All other #' term are removed from the output. Note that the term names must match #' the names of the model's coefficients. For factors, this means that #' the variable name is suffixed with the related factor level, and each #' category counts as one term. E.g. \code{rm.terms = "t_name [2,3]"} #' would remove the terms \code{"t_name2"} and \code{"t_name3"} (assuming #' that the variable \code{t_name} is categorical and has at least #' the factor levels \code{2} and \code{3}). Another example for the #' \emph{iris}-dataset: \code{terms = "Species"} would not work, instead #' you would write \code{terms = "Species [versicolor,virginica]"} to #' remove these two levels, or \code{terms = "Speciesversicolor"} if you #' just want to remove the level \emph{versicolor} from the plot.} #' \item{\emph{Marginal Effects}}{Here \code{terms} indicates for which #' terms marginal effects should be displayed. At least one term is #' required to calculate effects, maximum length is three terms, where #' the second and third term indicate the groups, i.e. predictions of #' first term are grouped by the levels of the second (and third) term. #' \code{terms} may also indicate higher order terms (e.g. interaction #' terms). Indicating levels in square brackets allows for selecting only #' specific groups. Term name and levels in brackets must be separated by #' a whitespace character, e.g. \code{terms = c("age", "education [1,3]")}. #' It is also possible to specify a range of numeric values for the #' predictions with a colon, for instance \code{terms = c("education [1,3]", #' "age [30:50]")}. Furthermore, it is possible to specify a function name. #' Values for predictions will then be transformed, e.g. #' \code{terms = "income [exp]"}. This is useful when model predictors were #' transformed for fitting the model and should be back-transformed to the #' original scale for predictions. Finally, numeric vectors for which no #' specific values are given, a "pretty range" is calculated, to avoid #' memory allocation problems for vectors with many unique values. If a #' numeric vector is specified as second or third term (i.e. if this vector #' represents a grouping structure), representative values (see #' \code{\link[ggeffects]{values_at}}) are chosen. If all values for a #' numeric vector should be used to compute predictions, you may use #' e.g. terms = "age [all]". For more details, see #' \code{\link[ggeffects]{ggpredict}}.} #' } #' @param sort.est Determines in which way estimates are sorted in the plot: #' \itemize{ #' \item If \code{NULL} (default), no sorting is done and estimates are sorted in the same order as they appear in the model formula. #' \item If \code{TRUE}, estimates are sorted in descending order, with highest estimate at the top. #' \item If \code{sort.est = "sort.all"}, estimates are re-sorted for each coefficient (only applies if \code{type = "re"} and \code{grid = FALSE}), i.e. the estimates of the random effects for each predictor are sorted and plotted to an own plot. #' \item If \code{type = "re"}, specify a predictor's / coefficient's name to sort estimates according to this random effect. #' } #' @param rm.terms Character vector with names that indicate which terms should #' be removed from the plot. Counterpart to \code{terms}. \code{rm.terms = #' "t_name"} would remove the term \emph{t_name}. Default is \code{NULL}, i.e. #' all terms are used. For factors, levels that should be removed from the plot #' need to be explicitely indicated in square brackets, and match the model's #' coefficient names, e.g. \code{rm.terms = "t_name [2,3]"} would remove the terms #' \code{"t_name2"} and \code{"t_name3"} (assuming that the variable \code{t_name} #' was categorical and has at least the factor levels \code{2} and \code{3}). #' Another example for the \emph{iris} dataset would be #' \code{rm.terms = "Species [versicolor,virginica]"}. Note that the #' \code{rm.terms}-argument does not apply to \emph{Marginal Effects} plots. #' @param group.terms Numeric vector with group indices, to group coefficients. #' Each group of coefficients gets its own color (see 'Examples'). #' @param order.terms Numeric vector, indicating in which order the coefficients #' should be plotted. See examples in #' \href{https://strengejacke.github.io/sjPlot/articles/plot_model_estimates.html}{this package-vignette}. #' @param pred.type Character, only applies for \emph{Marginal Effects} plots #' with mixed effects models. Indicates whether predicted values should be #' conditioned on random effects (\code{pred.type = "re"}) or fixed effects #' only (\code{pred.type = "fe"}, the default). For details, see documentation #' of the \code{type}-argument in \code{\link[ggeffects]{ggpredict}}. #' @param mdrt.values Indicates which values of the moderator variable should be #' used when plotting interaction terms (i.e. \code{type = "int"}). \describe{ #' \item{\code{"minmax"}}{(default) minimum and maximum values (lower and #' upper bounds) of the moderator are used to plot the interaction between #' independent variable and moderator(s).} \item{\code{"meansd"}}{uses the #' mean value of the moderator as well as one standard deviation below and #' above mean value to plot the effect of the moderator on the independent #' variable (following the convention suggested by Cohen and Cohen and #' popularized by Aiken and West (1991), i.e. using the mean, the value one #' standard deviation above, and the value one standard deviation below the #' mean as values of the moderator, see #' \href{https://www.theanalysisfactor.com/3-tips-interpreting-moderation/}{Grace-Martin #' K: 3 Tips to Make Interpreting Moderation Effects Easier}).} #' \item{\code{"zeromax"}}{is similar to the \code{"minmax"} option, however, #' \code{0} is always used as minimum value for the moderator. This may be #' useful for predictors that don't have an empirical zero-value, but absence #' of moderation should be simulated by using 0 as minimum.} #' \item{\code{"quart"}}{calculates and uses the quartiles (lower, median and #' upper) of the moderator value.} \item{\code{"all"}}{uses all values of the #' moderator variable.} } #' @param ri.nr Numeric vector. If \code{type = "re"} and fitted model has more #' than one random intercept, \code{ri.nr} indicates which random effects of #' which random intercept (or: which list elements of #' \code{\link[lme4]{ranef}}) will be plotted. Default is \code{NULL}, so all #' random effects will be plotted. #' @param title Character vector, used as plot title. By default, #' \code{\link[sjlabelled]{response_labels}} is called to retrieve the label of #' the dependent variable, which will be used as title. Use \code{title = ""} #' to remove title. #' @param axis.title Character vector of length one or two (depending on the #' plot function and type), used as title(s) for the x and y axis. If not #' specified, a default labelling is chosen. \strong{Note:} Some plot types #' may not support this argument sufficiently. In such cases, use the returned #' ggplot-object and add axis titles manually with #' \code{\link[ggplot2]{labs}}. Use \code{axis.title = ""} to remove axis #' titles. #' @param axis.labels Character vector with labels for the model terms, used as #' axis labels. By default, \code{\link[sjlabelled]{term_labels}} is #' called to retrieve the labels of the coefficients, which will be used as #' axis labels. Use \code{axis.labels = ""} or \code{auto.label = FALSE} to #' use the variable names as labels instead. If \code{axis.labels} is a named #' vector, axis labels (by default, the names of the model's coefficients) #' will be matched with the names of \code{axis.label}. This ensures that #' labels always match the related axis value, no matter in which way #' axis labels are sorted. #' @param axis.lim Numeric vector of length 2, defining the range of the plot #' axis. Depending on plot-type, may effect either x- or y-axis. For #' \emph{Marginal Effects} plots, \code{axis.lim} may also be a list of two #' vectors of length 2, defining axis limits for both the x and y axis. #' @param legend.title Character vector, used as legend title for plots that #' have a legend. #' @param grid.breaks Numeric value or vector; if \code{grid.breaks} is a #' single value, sets the distance between breaks for the axis at every #' \code{grid.breaks}'th position, where a major grid line is plotted. If #' \code{grid.breaks} is a vector, values will be used to define the #' axis positions of the major grid lines. #' @param ci.lvl Numeric, the level of the confidence intervals (error bars). #' Use \code{ci.lvl = NA} to remove error bars. For \code{stanreg}-models, #' \code{ci.lvl} defines the (outer) probability for the \emph{credible interval} #' that is plotted (see \code{\link[bayestestR]{ci}}). By #' default, \code{stanreg}-models are printed with two intervals: the "inner" #' interval, which defaults to the 50\%-CI; and the "outer" interval, which #' defaults to the 89\%-CI. \code{ci.lvl} affects only the outer interval in #' such cases. See \code{prob.inner} and \code{prob.outer} under the #' \code{...}-argument for more details. #' @param se Logical, if \code{TRUE}, the standard errors are #' also printed. If robust standard errors are required, use arguments #' \code{vcov.fun}, \code{vcov.type} and \code{vcov.args} (see #' \code{\link[parameters]{standard_error_robust}} and #' \href{https://easystats.github.io/parameters/articles/model_parameters_robust.html}{this vignette} #' for details), or use argument \code{robust} as shortcut. \code{se} overrides #' \code{ci.lvl}: if not \code{NULL}, arguments \code{ci.lvl} and \code{transform} #' will be ignored. Currently, \code{se} only applies to \emph{Coefficients} plots. #' @param show.intercept Logical, if \code{TRUE}, the intercept of the fitted #' model is also plotted. Default is \code{FALSE}. If \code{transform = #' "exp"}, please note that due to exponential transformation of estimates, #' the intercept in some cases is non-finite and the plot can not be created. #' @param show.values Logical, whether values should be plotted or not. #' @param show.p Logical, adds asterisks that indicate the significance level of #' estimates to the value labels. #' @param show.data Logical, for \emph{Marginal Effects} plots, also plots the #' raw data points. #' @param show.legend For \emph{Marginal Effects} plots, shows or hides the #' legend. #' @param show.zeroinf Logical, if \code{TRUE}, shows the zero-inflation part of #' hurdle- or zero-inflated models. #' @param robust Logical, shortcut for arguments \code{vcov.fun} and \code{vcov.type}. #' If \code{TRUE}, uses \code{vcov.fun = "vcovHC"} and \code{vcov.type = "HC3"} as #' default, that is, \code{\link[sandwich]{vcovHC}} with default-type is called #' (see \code{\link[parameters]{standard_error_robust}} and #' \href{https://easystats.github.io/parameters/articles/model_parameters_robust.html}{this vignette} #' for further details). #' @param vcov.fun Character vector, indicating the name of the \code{vcov*()}-function #' from the \pkg{sandwich} or \pkg{clubSandwich} package, e.g. \code{vcov.fun = "vcovCL"}, #' if robust standard errors are required. #' @param vcov.type Character vector, specifying the estimation type for the #' robust covariance matrix estimation (see \code{\link[sandwich:vcovHC]{vcovHC()}} #' or \code{clubSandwich::vcovCR()} for details). #' @param vcov.args List of named vectors, used as additional arguments that #' are passed down to \code{vcov.fun}. #' @param value.offset Numeric, offset for text labels to adjust their position #' relative to the dots or lines. #' @param dot.size Numeric, size of the dots that indicate the point estimates. #' @param line.size Numeric, size of the lines that indicate the error bars. #' @param colors May be a character vector of color values in hex-format, valid #' color value names (see \code{demo("colors")}) or a name of a pre-defined #' color palette. Following options are valid for the \code{colors} argument: #' \itemize{ #' \item If not specified, a default color brewer palette will be used, which is suitable for the plot style. #' \item If \code{"gs"}, a greyscale will be used. #' \item If \code{"bw"}, and plot-type is a line-plot, the plot is black/white and uses different line types to distinguish groups (see \href{https://strengejacke.github.io/sjPlot/articles/blackwhitefigures.html}{this package-vignette}). #' \item If \code{colors} is any valid color brewer palette name, the related palette will be used. Use \code{RColorBrewer::display.brewer.all()} to view all available palette names. #' \item There are some pre-defined color palettes in this package, see \code{\link{sjPlot-themes}} for details. #' \item Else specify own color values or names as vector (e.g. \code{colors = "#00ff00"} or \code{colors = c("firebrick", "blue")}). #' } #' @param grid Logical, if \code{TRUE}, multiple plots are plotted as grid #' layout. #' @param p.threshold Numeric vector of length 3, indicating the treshold for #' annotating p-values with asterisks. Only applies if #' \code{p.style = "asterisk"}. #' @param wrap.title Numeric, determines how many chars of the plot title are #' displayed in one line and when a line break is inserted. #' @param wrap.labels Numeric, determines how many chars of the value, variable #' or axis labels are displayed in one line and when a line break is inserted. #' @param case Desired target case. Labels will automatically converted into the #' specified character case. See \code{snakecase::to_any_case()} for more #' details on this argument. By default, if \code{case} is not specified, #' it will be set to \code{"parsed"}, unless \code{prefix.labels} is not #' \code{"none"}. If \code{prefix.labels} is either \code{"label"} (or #' \code{"l"}) or \code{"varname"} (or \code{"v"}) and \code{case} is not #' specified, it will be set to \code{NULL} - this is a more convenient #' default when prefixing labels. #' @param auto.label Logical, if \code{TRUE} (the default), #' and \href{https://strengejacke.github.io/sjlabelled/articles/intro_sjlabelled.html}{data is labelled}, #' \code{\link[sjlabelled]{term_labels}} is called to retrieve the labels #' of the coefficients, which will be used as predictor labels. If data is #' not labelled, \href{https://easystats.github.io/parameters/reference/format_parameters.html}{format_parameters()} #' is used to create pretty labels. If \code{auto.label = FALSE}, #' original variable names and value labels (factor levels) are used. #' @param prefix.labels Indicates whether the value labels of categorical variables #' should be prefixed, e.g. with the variable name or variable label. See #' argument \code{prefix} in \code{\link[sjlabelled]{term_labels}} for #' details. #' @param jitter Numeric, between 0 and 1. If \code{show.data = TRUE}, you can #' add a small amount of random variation to the location of each data point. #' \code{jitter} then indicates the width, i.e. how much of a bin's width #' will be occupied by the jittered values. #' @param digits Numeric, amount of digits after decimal point when rounding #' estimates or values. #' @param p.adjust Character vector, if not \code{NULL}, indicates the method #' to adjust p-values. See \code{\link[stats]{p.adjust}} for details. #' @param value.size Numeric, indicates the size of value labels. Can be used #' for all plot types where the argument \code{show.values} is applicable, #' e.g. \code{value.size = 4}. #' @param vline.color Color of the vertical "zero effect" line. Default color is #' inherited from the current theme. #' @param bpe For \strong{Stan}-models (fitted with the \pkg{rstanarm}- or #' \pkg{brms}-package), the Bayesian point estimate is, by default, the median #' of the posterior distribution. Use \code{bpe} to define other functions to #' calculate the Bayesian point estimate. \code{bpe} needs to be a character #' naming the specific function, which is passed to the \code{fun}-argument in #' \code{\link[sjmisc]{typical_value}}. So, \code{bpe = "mean"} would #' calculate the mean value of the posterior distribution. #' @param bpe.style For \strong{Stan}-models (fitted with the \pkg{rstanarm}- or #' \pkg{brms}-package), the Bayesian point estimate is indicated as a small, #' vertical line by default. Use \code{bpe.style = "dot"} to plot a dot #' instead of a line for the point estimate. #' @param bpe.color Character vector, indicating the color of the Bayesian #' point estimate. Setting \code{bpe.color = NULL} will inherit the color #' from the mapped aesthetic to match it with the geom's color. #' @param ci.style Character vector, defining whether inner and outer intervals #' for Bayesion models are shown in boxplot-style (\code{"whisker"}) or in #' bars with different alpha-levels (\code{"bar"}). #' @param ... Other arguments, passed down to various functions. Here is a list #' of supported arguments and their description in detail. #' \describe{ #' \item{\code{prob.inner} and \code{prob.outer}}{For \strong{Stan}-models #' (fitted with the \pkg{rstanarm}- or \pkg{brms}-package) and coefficients #' plot-types, you can specify numeric values between 0 and 1 for #' \code{prob.inner} and \code{prob.outer}, which will then be used as inner #' and outer probabilities for the uncertainty intervals (HDI). By default, #' the inner probability is 0.5 and the outer probability is 0.89 (unless #' \code{ci.lvl} is specified - in this case, \code{ci.lvl} is used as outer #' probability). #' } #' \item{\code{size.inner}}{For \strong{Stan}-models and \emph{Coefficients} #' plot-types, you can specify the width of the bar for the inner #' probabilities. Default is \code{0.1}. Setting \code{size.inner = 0} #' removes the inner probability regions. #' } #' \item{\code{width}, \code{alpha}, and \code{scale}}{Passed #' down to \code{geom_errorbar()} or \code{geom_density_ridges()}, for #' forest or diagnostic plots. #' } #' \item{\code{width}, \code{alpha}, \code{dot.alpha}, \code{dodge} and \code{log.y}}{Passed #' down to \code{\link[ggeffects]{plot.ggeffects}} for \emph{Marginal Effects} #' plots. #' } #' \item{\code{show.loess}}{Logical, for diagnostic plot-types \code{"slope"} #' and \code{"resid"}, adds (or hides) a loess-smoothed line to the plot. #' } #' \item{\emph{Marginal Effects} plot-types}{When plotting marginal effects, #' arguments are also passed down to \code{\link[ggeffects]{ggpredict}}, #' \code{\link[ggeffects]{ggeffect}} or \code{\link[ggeffects]{plot.ggeffects}}. #' } #' \item{Case conversion of labels}{For case conversion of labels (see argument #' \code{case}), arguments \code{sep_in} and \code{sep_out} will be passed #' down to \code{snakecase::to_any_case()}. This only #' applies to automatically retrieved term labels, \emph{not} if #' term labels are provided by the \code{axis.labels}-argument. #' } #' } #' #' @return #' Depending on the plot-type, \code{plot_model()} returns a #' \code{ggplot}-object or a list of such objects. \code{get_model_data} #' returns the associated data with the plot-object as tidy data frame, or #' (depending on the plot-type) a list of such data frames. #' #' @details #' \subsection{Different Plot Types}{ #' \describe{ #' \item{\code{type = "std"}}{Plots standardized estimates. See details below.} #' \item{\code{type = "std2"}}{Plots standardized estimates, however, #' standardization follows Gelman's (2008) suggestion, rescaling the #' estimates by dividing them by two standard deviations instead of just one. #' Resulting coefficients are then directly comparable for untransformed #' binary predictors. #' } #' \item{\code{type = "pred"}}{Plots estimated marginal means (or marginal effects). #' Simply wraps \code{\link[ggeffects]{ggpredict}}. See also #' \href{https://strengejacke.github.io/sjPlot/articles/plot_marginal_effects.html}{this package-vignette}. #' } #' \item{\code{type = "eff"}}{Plots estimated marginal means (or marginal effects). #' Simply wraps \code{\link[ggeffects]{ggeffect}}. See also #' \href{https://strengejacke.github.io/sjPlot/articles/plot_marginal_effects.html}{this package-vignette}. #' } #' \item{\code{type = "int"}}{A shortcut for marginal effects plots, where #' interaction terms are automatically detected and used as #' \code{terms}-argument. Furthermore, if the moderator variable (the second #' - and third - term in an interaction) is continuous, \code{type = "int"} #' automatically chooses useful values based on the \code{mdrt.values}-argument, #' which are passed to \code{terms}. Then, \code{\link[ggeffects]{ggpredict}} #' is called. \code{type = "int"} plots the interaction term that appears #' first in the formula along the x-axis, while the second (and possibly #' third) variable in an interaction is used as grouping factor(s) #' (moderating variable). Use \code{type = "pred"} or \code{type = "eff"} #' and specify a certain order in the \code{terms}-argument to indicate #' which variable(s) should be used as moderator. See also #' \href{https://strengejacke.github.io/sjPlot/articles/plot_interactions.html}{this package-vignette}. #' } #' \item{\code{type = "slope"} and \code{type = "resid"}}{Simple diagnostic-plots, #' where a linear model for each single predictor is plotted against the #' response variable, or the model's residuals. Additionally, a loess-smoothed #' line is added to the plot. The main purpose of these plots is to check whether #' the relationship between outcome (or residuals) and a predictor is roughly #' linear or not. Since the plots are based on a simple linear regression with #' only one model predictor at the moment, the slopes (i.e. coefficients) may #' differ from the coefficients of the complete model. #' } #' \item{\code{type = "diag"}}{For \strong{Stan-models}, plots the prior versus #' posterior samples. For \strong{linear (mixed) models}, plots for #' multicollinearity-check (Variance Inflation Factors), QQ-plots, #' checks for normal distribution of residuals and homoscedasticity #' (constant variance of residuals) are shown. For \strong{generalized #' linear mixed models}, returns the QQ-plot for random effects. #' } #' } #' } #' \subsection{Standardized Estimates}{ #' Default standardization is done by completely refitting the model on the #' standardized data. Hence, this approach is equal to standardizing the #' variables before fitting the model, which is particularly recommended for #' complex models that include interactions or transformations (e.g., polynomial #' or spline terms). When \code{type = "std2"}, standardization of estimates #' follows \href{http://www.stat.columbia.edu/~gelman/research/published/standardizing7.pdf}{Gelman's (2008)} #' suggestion, rescaling the estimates by dividing them by two standard deviations #' instead of just one. Resulting coefficients are then directly comparable for #' untransformed binary predictors. #' } #' #' @references #' Gelman A (2008) "Scaling regression inputs by dividing by two #' standard deviations." \emph{Statistics in Medicine 27: 2865-2873.} #' \url{http://www.stat.columbia.edu/~gelman/research/published/standardizing7.pdf} #' \cr \cr #' Aiken and West (1991). Multiple Regression: Testing and Interpreting Interactions. #' #' @examples #' # prepare data #' library(sjmisc) #' data(efc) #' efc <- to_factor(efc, c161sex, e42dep, c172code) #' m <- lm(neg_c_7 ~ pos_v_4 + c12hour + e42dep + c172code, data = efc) #' #' # simple forest plot #' plot_model(m) #' #' # grouped coefficients #' plot_model(m, group.terms = c(1, 2, 3, 3, 3, 4, 4)) #' #' # keep only selected terms in the model: pos_v_4, the #' # levels 3 and 4 of factor e42dep and levels 2 and 3 for c172code #' plot_model(m, terms = c("pos_v_4", "e42dep [3,4]", "c172code [2,3]")) #' #' # multiple plots, as returned from "diagnostic"-plot type, #' # can be arranged with 'plot_grid()' #' \dontrun{ #' p <- plot_model(m, type = "diag") #' plot_grid(p)} #' #' # plot random effects #' if (require("lme4") && require("glmmTMB")) { #' m <- lmer(Reaction ~ Days + (Days | Subject), sleepstudy) #' plot_model(m, type = "re") #' #' # plot marginal effects #' plot_model(m, type = "pred", terms = "Days") #' } #' # plot interactions #' \dontrun{ #' m <- glm( #' tot_sc_e ~ c161sex + c172code * neg_c_7, #' data = efc, #' family = poisson() #' ) #' # type = "int" automatically selects groups for continuous moderator #' # variables - see argument 'mdrt.values'. The following function call is #' # identical to: #' # plot_model(m, type = "pred", terms = c("c172code", "neg_c_7 [7,28]")) #' plot_model(m, type = "int") #' #' # switch moderator #' plot_model(m, type = "pred", terms = c("neg_c_7", "c172code")) #' # same as #' # ggeffects::ggpredict(m, terms = c("neg_c_7", "c172code"))} #' #' # plot Stan-model #' \dontrun{ #' if (require("rstanarm")) { #' data(mtcars) #' m <- stan_glm(mpg ~ wt + am + cyl + gear, data = mtcars, chains = 1) #' plot_model(m, bpe.style = "dot") #' }} #' #' @importFrom insight model_info find_predictors #' @importFrom sjmisc word_wrap str_contains #' @importFrom sjlabelled response_labels term_labels #' @importFrom dplyr if_else n_distinct #' @importFrom graphics plot #' @importFrom ggeffects ggpredict ggeffect #' @importFrom stats terms #' #' @export plot_model <- function(model, type = c("est", "re", "eff", "emm", "pred", "int", "std", "std2", "slope", "resid", "diag"), transform, terms = NULL, sort.est = NULL, rm.terms = NULL, group.terms = NULL, order.terms = NULL, pred.type = c("fe", "re"), mdrt.values = c("minmax", "meansd", "zeromax", "quart", "all"), ri.nr = NULL, title = NULL, axis.title = NULL, axis.labels = NULL, legend.title = NULL, wrap.title = 50, wrap.labels = 25, axis.lim = NULL, grid.breaks = NULL, ci.lvl = NULL, se = NULL, robust = FALSE, vcov.fun = NULL, vcov.type = c("HC3", "const", "HC", "HC0", "HC1", "HC2", "HC4", "HC4m", "HC5"), vcov.args = NULL, colors = "Set1", show.intercept = FALSE, show.values = FALSE, show.p = TRUE, show.data = FALSE, show.legend = TRUE, show.zeroinf = TRUE, value.offset = NULL, value.size, jitter = NULL, digits = 2, dot.size = NULL, line.size = NULL, vline.color = NULL, p.threshold = c(0.05, 0.01, 0.001), p.adjust = NULL, grid, case, auto.label = TRUE, prefix.labels = c("none", "varname", "label"), bpe = "median", bpe.style = "line", bpe.color = "white", ci.style = c("whisker", "bar"), ... ) { type <- match.arg(type) pred.type <- match.arg(pred.type) mdrt.values <- match.arg(mdrt.values) prefix.labels <- match.arg(prefix.labels) vcov.type <- match.arg(vcov.type) ci.style <- match.arg(ci.style) # if we prefix labels, use different default for case conversion, # else the separating white spaces after colon are removed. if (missing(case)) { if (prefix.labels == "none") case <- "parsed" else case <- NULL } if (isTRUE(robust)) { vcov.type <- "HC3" vcov.fun <- "vcovHC" } # check se-argument vcov.fun <- check_se_argument(se = vcov.fun, type = type) # get info on model family fam.info <- insight::model_info(model) if (insight::is_multivariate(model)) fam.info <- fam.info[[1]] # check whether estimates should be transformed or not if (missing(transform)) { if (fam.info$is_linear) transform <- NULL else transform <- "exp" } # get titles and labels for axis ---- # this is not appropriate when plotting random effects, # so retrieve labels only for other plot types if (type %in% c("est", "std", "std2") && isTRUE(auto.label)) { # get labels of dependent variables, and wrap them if too long if (is.null(title)) title <- sjlabelled::response_labels(model, case = case, mv = fam.info$is_multivariate, ...) title <- sjmisc::word_wrap(title, wrap = wrap.title) # labels for axis with term names if (is.null(axis.labels)) { term_labels <- sjlabelled::term_labels(model, case = case, prefix = prefix.labels, ...) if (.labelled_model_data(model) || is.stan(model)) axis.labels <- term_labels } axis.labels <- sjmisc::word_wrap(axis.labels, wrap = wrap.labels) # title for axis with estimate values if (is.null(axis.title)) axis.title <- sjmisc::word_wrap(estimate_axis_title(fit = model, axis.title = axis.title, type = type, transform = transform, include.zeroinf = TRUE), wrap = wrap.title) axis.title <- sjmisc::word_wrap(axis.title, wrap = wrap.labels) } # check nr of estimates. if only one, plot slope if (type == "est" && length(insight::find_predictors(model, component = "conditional", flatten = TRUE)) == 1 && length(insight::find_predictors(model, component = "instruments", flatten = TRUE)) == 0 && fam.info$is_linear && one_par(model)) type <- "slope" # set some default options for stan-models, which are not # available or appropriate for these if (is.stan(model)) { # no p-values show.p <- FALSE # no standardized coefficients if (type %in% c("std", "std2", "slope")) type <- "est" } # set defaults for arguments, depending on model ---- if (is.null(ci.lvl)) ci.lvl <- dplyr::if_else(is.stan(model), .89, .95) if (is.null(dot.size)) dot.size <- dplyr::if_else(is.stan(model), 1, 2.5) if (is.null(line.size)) line.size <- dplyr::if_else(is.stan(model), .7, .7) if (is.null(value.offset)) value.offset <- dplyr::if_else(is.stan(model), .25, .15) # check if plot-type is applicable if (type == "slope" && !fam.info$is_linear) { type <- "est" message("Plot-type \"slope\" only available for linear models. Using `type = \"est\"` now.") } if (type %in% c("est", "std", "std2") || (is.stan(model) && type == "re")) { # plot estimates ---- p <- plot_type_est( type = type, ci.lvl = ci.lvl, se = se, tf = transform, model = model, terms = terms, group.terms = group.terms, rm.terms = rm.terms, sort.est = sort.est, title = title, axis.title = axis.title, axis.labels = axis.labels, axis.lim = axis.lim, grid.breaks = grid.breaks, show.intercept = show.intercept, show.values = show.values, show.p = show.p, value.offset = value.offset, digits = digits, geom.colors = colors, geom.size = dot.size, line.size = line.size, order.terms = order.terms, vline.color = vline.color, value.size = value.size, bpe = bpe, bpe.style = bpe.style, bpe.color = bpe.color, facets = grid, show.zeroinf = show.zeroinf, p.threshold = p.threshold, vcov.fun = vcov.fun, vcov.type = vcov.type, vcov.args = vcov.args, ci.style = ci.style, p_adjust = p.adjust, ... ) } else if (type == "re") { # plot random effects ---- p <- plot_type_ranef( model = model, ri.nr = ri.nr, ci.lvl = ci.lvl, se = se, tf = transform, sort.est = sort.est, title = title, axis.labels = axis.labels, axis.lim = axis.lim, grid.breaks = grid.breaks, show.values = show.values, value.offset = value.offset, digits = digits, facets = grid, geom.colors = colors, geom.size = dot.size, line.size = line.size, vline.color = vline.color, value.size = value.size, bpe.color = bpe.color, ci.style = ci.style, ... ) } else if (type %in% c("pred", "eff", "emm")) { # plot marginal effects ---- p <- plot_type_eff( type = type, model = model, terms = terms, ci.lvl = ci.lvl, pred.type = pred.type, facets = grid, show.data = show.data, jitter = jitter, geom.colors = colors, axis.title = axis.title, title = title, legend.title = legend.title, axis.lim = axis.lim, case = case, show.legend = show.legend, dot.size = dot.size, line.size = line.size, ... ) } else if (type == "int") { # plot interaction terms ---- p <- plot_type_int( model = model, mdrt.values = mdrt.values, ci.lvl = ci.lvl, pred.type = pred.type, facets = grid, show.data = show.data, jitter = jitter, geom.colors = colors, axis.title = axis.title, title = title, legend.title = legend.title, axis.lim = axis.lim, case = case, show.legend = show.legend, dot.size = dot.size, line.size = line.size, ... ) } else if (type %in% c("slope", "resid")) { # plot slopes of estimates ---- p <- plot_type_slope( model = model, terms = terms, rm.terms = rm.terms, ci.lvl = ci.lvl, colors = colors, title = title, show.data = show.data, jitter = jitter, facets = grid, axis.title = axis.title, case = case, useResiduals = type == "resid", ... ) } else if (type == "diag") { # plot diagnostic plots ---- if (is.stan(model)) { p <- plot_diag_stan( model = model, geom.colors = colors, axis.lim = axis.lim, facets = grid, axis.labels = axis.labels, ... ) } else if (fam.info$is_linear) { p <- plot_diag_linear( model = model, geom.colors = colors, dot.size = dot.size, line.size = line.size, ... ) } else { p <- plot_diag_glm( model = model, geom.colors = colors, dot.size = dot.size, line.size = line.size, ... ) } } p } #' @importFrom purrr map #' @rdname plot_model #' @export get_model_data <- function(model, type = c("est", "re", "eff", "pred", "int", "std", "std2", "slope", "resid", "diag"), transform, terms = NULL, sort.est = NULL, rm.terms = NULL, group.terms = NULL, order.terms = NULL, pred.type = c("fe", "re"), ri.nr = NULL, ci.lvl = NULL, colors = "Set1", grid, case = "parsed", digits = 2, ...) { p <- plot_model( model = model, type = type, transform = transform, terms = terms, sort.est = sort.est, rm.terms = rm.terms, group.terms = group.terms, order.terms = order.terms, pred.type = pred.type, ri.nr = ri.nr, ci.lvl = ci.lvl, colors = colors, grid = grid, case = case, digits = digits, auto.label = FALSE, ... ) if (inherits(p, "list")) purrr::map(p, ~ .x$data) else p$data } #' @importFrom insight has_intercept one_par <- function(model) { tryCatch( { length(stats::coef(model)) < 2 & !insight::has_intercept(model) }, error = function(x) { FALSE } ) } sjPlot/R/plot_models.R0000644000176200001440000002660114150124577014405 0ustar liggesusers#' @title Forest plot of multiple regression models #' @name plot_models #' #' @description Plot and compare regression coefficients with confidence #' intervals of multiple regression models in one plot. #' #' @param ... One or more regression models, including glm's or mixed models. #' May also be a \code{list} with fitted models. See 'Examples'. #' @param std.est Choose whether standardized coefficients should be used #' for plotting. Default is no standardization (\code{std.est = NULL}). #' May be \code{"std"} for standardized beta values or \code{"std2"}, where #' standardization is done by rescaling estimates by dividing them by two sd. #' @param m.labels Character vector, used to indicate the different models #' in the plot's legend. If not specified, the labels of the dependent #' variables for each model are used. #' @param legend.pval.title Character vector, used as title of the plot legend that #' indicates the p-values. Default is \code{"p-level"}. Only applies if #' \code{p.shape = TRUE}. #' @param spacing Numeric, spacing between the dots and error bars of the #' plotted fitted models. Default is 0.3. #' @param p.shape Logical, if \code{TRUE}, significant levels are distinguished by #' different point shapes and a related legend is plotted. Default #' is \code{FALSE}. #' #' @inheritParams plot_model #' @inheritParams plot_grpfrq #' #' @return A ggplot-object. #' #' @examples #' data(efc) #' #' # fit three models #' fit1 <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) #' fit2 <- lm(neg_c_7 ~ c160age + c12hour + c161sex + c172code, data = efc) #' fit3 <- lm(tot_sc_e ~ c160age + c12hour + c161sex + c172code, data = efc) #' #' # plot multiple models #' plot_models(fit1, fit2, fit3, grid = TRUE) #' #' # plot multiple models with legend labels and #' # point shapes instead of value labels #' plot_models( #' fit1, fit2, fit3, #' axis.labels = c( #' "Carer's Age", "Hours of Care", "Carer's Sex", "Educational Status" #' ), #' m.labels = c("Barthel Index", "Negative Impact", "Services used"), #' show.values = FALSE, show.p = FALSE, p.shape = TRUE #' ) #' #' \dontrun{ #' # plot multiple models from nested lists argument #' all.models <- list() #' all.models[[1]] <- fit1 #' all.models[[2]] <- fit2 #' all.models[[3]] <- fit3 #' #' plot_models(all.models) #' #' # plot multiple models with different predictors (stepwise inclusion), #' # standardized estimates #' fit1 <- lm(mpg ~ wt + cyl + disp + gear, data = mtcars) #' fit2 <- update(fit1, . ~ . + hp) #' fit3 <- update(fit2, . ~ . + am) #' #' plot_models(fit1, fit2, fit3, std.est = "std2") #' } #' @import ggplot2 #' @importFrom purrr map map_df map2 #' @importFrom dplyr slice bind_rows filter #' @importFrom sjlabelled response_labels term_labels #' @importFrom rlang .data #' @importFrom sjmisc word_wrap var_rename add_variables #' @export plot_models <- function(..., transform = NULL, std.est = NULL, rm.terms = NULL, title = NULL, m.labels = NULL, legend.title = "Dependent Variables", legend.pval.title = "p-level", axis.labels = NULL, axis.title = NULL, axis.lim = NULL, wrap.title = 50, wrap.labels = 25, wrap.legend.title = 20, grid.breaks = NULL, dot.size = 3, line.size = NULL, value.size = NULL, spacing = 0.4, colors = "Set1", show.values = FALSE, show.legend = TRUE, show.intercept = FALSE, show.p = TRUE, p.shape = FALSE, p.threshold = c(0.05, 0.01, 0.001), p.adjust = NULL, ci.lvl = .95, robust = FALSE, vcov.fun = NULL, vcov.type = c("HC3", "const", "HC", "HC0", "HC1", "HC2", "HC4", "HC4m", "HC5"), vcov.args = NULL, vline.color = NULL, digits = 2, grid = FALSE, auto.label = TRUE, prefix.labels = c("none", "varname", "label")) { # retrieve list of fitted models input_list <- list(...) names(input_list) <- unlist(lapply(match.call(expand.dots = F)$`...`, deparse)) vcov.type <- match.arg(vcov.type) if (isTRUE(robust)) { vcov.type <- "HC3" vcov.fun <- "vcovHC" } # check se-argument vcov.fun <- check_se_argument(se = vcov.fun, type = "est") if (missing(line.size) || is.null(line.size)) line.size <- .7 if (missing(value.size) || is.null(value.size)) value.size <- 4 # check length. if we have a list of fitted model, we need to "unlist" them if (length(input_list) == 1 && class(input_list[[1]]) == "list") input_list <- purrr::map(input_list[[1]], ~ .x) # get info on model family fam.info <- insight::model_info(input_list[[1]]) if (insight::is_multivariate(input_list[[1]])) fam.info <- fam.info[[1]] # check whether estimates should be transformed or not if (missing(transform)) { if (fam.info$is_linear) tf <- NULL else tf <- "exp" } else tf <- transform # check for standardization, only applies to linear models if (!any(inherits(input_list[[1]], c("lm", "lmerMod", "lme"), which = TRUE) == 1)) std.est <- NULL if (!is.null(std.est)) { std_method <- switch(std.est, "std" = "refit", "std2" = "2sd", "refit") } else { std_method <- FALSE } # if not standardized, we can get simple tidy output and # need to check whether intercept should be removed or not fl <- purrr::map( input_list, ~ tidy_model( model = .x, ci.lvl = ci.lvl, tf = transform, type = "est", bpe = "median", robust = list(vcov.fun = vcov.fun, vcov.type = vcov.type, vcov.args = vcov.args), facets = TRUE, show.zeroinf = FALSE, p.val = "wald", standardize = std_method, bootstrap = FALSE, iterations = 1000, seed = NULL, p_adjust = p.adjust ) ) # remove intercept from output if (!show.intercept) { fl <- purrr::map(fl, function(x) { rm.i <- string_ends_with("(Intercept)", x = x$term) if (length(rm.i)) { dplyr::slice(x, !! -rm.i) } else { x } }) } # exponentiation if (!is.null(tf)) { funtrans <- match.fun(tf) fl <- purrr::map(fl, function(x) { x[["estimate"]] <- funtrans(x[["estimate"]]) x[["conf.low"]] <- funtrans(x[["conf.low"]]) x[["conf.high"]] <- funtrans(x[["conf.high"]]) x }) } # add grouping index for (i in 1:length(fl)) fl[[i]] <- sjmisc::add_variables(fl[[i]], group = as.character(i), .after = Inf) # merge models to one data frame ff <- dplyr::bind_rows(fl) # remove further estimates rm.terms <- parse_terms(rm.terms) rems <- !(ff$term %in% rm.terms) if (!is.null(rm.terms)) ff <- dplyr::filter(ff, !! rems) # get labels of dependent variables, and wrap them if too long if (is.null(m.labels)) m.labels <- sjlabelled::response_labels(input_list) m.labels <- sjmisc::word_wrap(m.labels, wrap = wrap.labels) # make sure we have distinct labels, because we use them as # factor levels. else, duplicated factor levels will be dropped, # leading to missing groups in plot output if (anyDuplicated(m.labels) > 0) m.labels <- suppressMessages(tidy_label(m.labels)) ff$group <- as.factor(ff$group) levels(ff$group) <- m.labels # reverse group, to plot correct order from top to bottom ff$group <- factor(ff$group, levels = rev(unique(ff$group))) # add p-asterisks to data ff$p.stars <- get_p_stars(ff$p.value, p.threshold) ff$p.label <- sprintf("%.*f", digits, ff$estimate) if (show.p) ff$p.label <- sprintf("%s %s", ff$p.label, ff$p.stars) # axis limits and tick breaks for y-axis axis.scaling <- axis_limits_and_ticks( axis.lim = axis.lim, min.val = min(ff$conf.low), max.val = max(ff$conf.high), grid.breaks = grid.breaks, exponentiate = isTRUE(tf == "exp"), min.est = min(ff$estimate), max.est = max(ff$estimate) ) # based on current ggplot theme, highlights vertical default line yintercept <- if (isTRUE(tf == "exp")) 1 else 0 layer_vertical_line <- geom_intercept_line(yintercept, axis.scaling, vline.color) # reorder terms ff$term <- factor(ff$term, levels = rev(unique(ff$term))) # ensure correct legend labels ff$p.stars[ff$p.stars == ""] <- "n.s." ff$p.stars <- factor(ff$p.stars, levels = c("n.s.", "*", "**", "***")) # set up base plot if (p.shape) p <- ggplot(ff, aes_string(x = "term", y = "estimate", colour = "group", shape = "p.stars")) else p <- ggplot(ff, aes_string(x = "term", y = "estimate", colour = "group")) p <- p + layer_vertical_line + geom_point(position = position_dodge(spacing), size = dot.size) + geom_errorbar( aes_string(ymin = "conf.low", ymax = "conf.high"), position = position_dodge(spacing), width = 0, size = line.size ) + coord_flip() + guides(colour = guide_legend(reverse = TRUE)) # show different shapes depending on p-value if (p.shape) p <- p + scale_shape_manual(values = c(1, 16, 17, 15)) # add value labels if (show.values) p <- p + geom_text( aes_string(label = "p.label"), position = position_dodge(spacing), vjust = spacing * -1.5, hjust = -.1, show.legend = FALSE, size = value.size ) # check axis labels if (is.null(axis.labels) && isTRUE(auto.label)) axis.labels <- sjlabelled::term_labels(input_list, prefix = prefix.labels) # set axis labels p <- p + scale_x_discrete(labels = sjmisc::word_wrap(axis.labels, wrap = wrap.labels)) # hide legend? if (!show.legend) p <- p + guides(colour = "none", shape = "none") # facets if (grid) p <- p + facet_grid(~group) # we need transformed scale for exponentiated estimates if (isTRUE(tf == "exp")) { p <- p + scale_y_continuous( trans = "log10", limits = axis.scaling$axis.lim, breaks = axis.scaling$ticks, labels = prettyNum ) } else { p <- p + scale_y_continuous( limits = axis.scaling$axis.lim, breaks = axis.scaling$ticks, labels = axis.scaling$ticks ) } # set colors p <- p + scale_colour_manual(values = col_check2(colors, length(m.labels))) # set axis and plot titles p <- p + labs( x = NULL, y = sjmisc::word_wrap(estimate_axis_title(input_list[[1]], axis.title, type = "est", transform = !is.null(tf)), wrap = wrap.title), title = sjmisc::word_wrap(title, wrap = wrap.title), colour = sjmisc::word_wrap(legend.title, wrap = wrap.legend.title), shape = sjmisc::word_wrap(legend.pval.title, wrap = wrap.legend.title) ) p } sjPlot/R/helpfunctions.R0000644000176200001440000002603713644121135014742 0ustar liggesusers# Help-functions # evaluates arguments get_dot_data <- function(data, dots) { # any dots? if (length(dots) > 0) # get variable names vars <- dot_names(dots) else vars <- NULL # check if data is a data frame if (is.data.frame(data)) { # get valid variable names vars <- vars[vars %in% colnames(data)] vars.is.empty <- sjmisc::is_empty(vars) if (!is.null(vars) && !vars.is.empty) # select variables, if any x <- data[, vars, drop = FALSE] else # else return complete data frame x <- data } x } # return names of objects passed as ellipses argument dot_names <- function(dots) unname(unlist(lapply(dots, as.character))) #' @importFrom dplyr quos select get_dplyr_dot_data <- function(x, qs) { if (sjmisc::is_empty(qs)) x else suppressMessages(dplyr::select(x, !!!qs)) } # add annotations with table summary # here we print out total N of cases, chi-square and significance of the table print.table.summary <- function(baseplot, modsum, summary.pos = "r") { if (!is.null(modsum)) { # add annotations with table summary # here we print out total N of cases, chi-square and significance of the table if (summary.pos == "r") { t.hjust <- "top" x.x <- Inf } else { t.hjust <- "bottom" x.x <- -Inf } baseplot <- baseplot + annotate( "text", label = modsum, parse = TRUE, x = x.x, y = Inf, vjust = "top", hjust = t.hjust ) } baseplot } get_var_name <- function(x) { if (is.null(x)) return(NULL) # remove "data frame name" dollar_pos <- regexpr("$", x, fixed = T)[1] if (dollar_pos != -1) x <- substr(x, start = dollar_pos + 1, stop = nchar(x)) x } # Create frequency data frame of a variable # for sjp and sjt frq functions #' @importFrom stats na.omit ftable na.pass #' @importFrom tidyr spread create.xtab.df <- function(x, grp, round.prz = 2, na.rm = FALSE, weight.by = NULL) { # ------------------------------ # convert to labels # ------------------------------ x_full <- suppressWarnings(sjmisc::to_label(x, add.non.labelled = T)) grp_full <- suppressWarnings(sjmisc::to_label(grp, add.non.labelled = T)) # ------------------------------ # create frequency crosstable. we need to convert # vector to labelled factor first. # ------------------------------ if (is.null(weight.by)) { if (na.rm) { mydat <- stats::ftable(table(x_full, grp_full)) } else { mydat <- stats::ftable(table(x_full, grp_full, useNA = "always")) } } else { if (na.rm) mydat <- stats::ftable(round(stats::xtabs(weight.by ~ x_full + grp_full)), 0) else mydat <- stats::ftable(round(stats::xtabs(weight.by ~ x_full + grp_full, exclude = NULL, na.action = stats::na.pass)), 0) } # create proportional tables, cell values ori.cell.values <- 100 * prop.table(mydat) proptab.cell <- round(100 * prop.table(mydat), round.prz) # create proportional tables, row percentages, including total row proptab.row <- rbind( as.data.frame(as.matrix(round(100 * prop.table(mydat, 1), round.prz))), round(colSums(ori.cell.values), round.prz) ) rownames(proptab.row)[nrow(proptab.row)] <- "total" proptab.row <- as.data.frame(apply(proptab.row, c(1, 2), function(x) if (is.na(x)) x <- 0 else x)) # create proportional tables, column percentages, including total row proptab.col <- cbind( as.data.frame(as.matrix(round(100 * prop.table(mydat, 2), round.prz))), round(rowSums(ori.cell.values), round.prz) ) colnames(proptab.col)[ncol(proptab.col)] <- "total" proptab.col <- as.data.frame(apply(proptab.col, c(1, 2), function(x) if (is.na(x)) x <- 0 else x)) # add total row and column to cell percentages afterwards proptab.cell <- rbind( as.data.frame(as.matrix(proptab.cell)), round(colSums(ori.cell.values), round.prz) ) proptab.cell <- cbind( as.data.frame(as.matrix(proptab.cell)), rowSums(proptab.cell) ) # due to roundings, total might differ from 100%, so clean this here proptab.cell[nrow(proptab.cell), ncol(proptab.cell)] <- 100 colnames(proptab.cell)[ncol(proptab.cell)] <- "total" rownames(proptab.cell)[nrow(proptab.cell)] <- "total" # convert to data frame mydat <- data.frame(mydat) colnames(mydat)[2] <- "Var2" # spread variables back, so we have a table again mydat <- tidyr::spread(mydat, .data$Var2, .data$Freq) # rename column names colnames(mydat)[1] <- "label" colnames(mydat)[is.na(colnames(mydat))] <- "NA" colnames(mydat)[colnames(mydat) == ""] <- "NA" # label must be character mydat$label <- as.character(mydat$label) mydat$label[is.na(mydat$label)] <- "NA" # save labels to extra vector labels.cnt <- mydat$label labels.grp <- colnames(mydat)[-1] # return result invisible(structure(list(mydat = mydat, proptab.cell = proptab.cell, proptab.col = proptab.col, proptab.row = proptab.row, labels.cnt = labels.cnt, labels.grp = labels.grp))) } # check character encoding for HTML-tables # (sjt-functions) get.encoding <- function(encoding, data = NULL) { if (is.null(encoding)) { if (!is.null(data) && is.data.frame(data)) { # get variable label labs <- sjlabelled::get_label(data[[1]]) # check if vectors of data frame have # any valid label. else, default to utf-8 if (!is.null(labs) && is.character(labs)) encoding <- Encoding(sjlabelled::get_label(data[[1]])) else encoding <- "UTF-8" # unknown encoding? default to utf-8 if (encoding == "unknown") encoding <- "UTF-8" } else if (.Platform$OS.type == "unix") encoding <- "UTF-8" else encoding <- "Windows-1252" } return(encoding) } # Calculate statistics of cross tabs #' @importFrom sjstats cramer phi table_values #' @importFrom stats chisq.test fisher.test xtabs crosstabsum <- function(x, grp, weight.by) { # -------------------------------------------------------- # check p-value-style option # -------------------------------------------------------- opt <- getOption("p_zero") if (is.null(opt) || opt == FALSE) { p_zero <- "" } else { p_zero <- "0" } if (is.null(weight.by)) { ftab <- table(x, grp) } else { ftab <- round(stats::xtabs(weight.by ~ x + grp), 0) } # calculate chi square value chsq <- stats::chisq.test(ftab) p.value <- chsq$p.value tab <- sjstats::table_values(ftab) # do we have cells with less than 5 observations? if (min(tab$expected) < 5 || (min(tab$expected) < 10 && chsq$parameter == 1)) { fish <- stats::fisher.test(ftab, simulate.p.value = (nrow(ftab) > 2 || ncol(ftab) > 2)) p.value <- fish$p.value } else { fish <- NULL } # pvalue in string if (p.value < 0.001) pvas <- sprintf("%s.001", p_zero) else pvas <- sub("0", p_zero, sprintf("%.3f", p.value)) # check whether variables are dichotome or if they have more # than two categories. if they have more, use Cramer's V to calculate # the contingency coefficient if (nrow(ftab) > 2 || ncol(ftab) > 2) { # check whether fisher's test or chi-squared should be printed if (is.null(fish)) { modsum <- as.character(as.expression( substitute("N" == tn * "," ~~ chi^2 == c2 * "," ~~ "df" == dft * "," ~~ phi[c] == kook * "," ~~ "p" == pva, list(tn = summary(ftab)$n.cases, c2 = sprintf("%.2f", chsq$statistic), dft = c(chsq$parameter), kook = sprintf("%.2f", sjstats::cramer(ftab)), pva = pvas)))) } else { modsum <- as.character(as.expression( substitute("N" == tn * "," ~~ "df" == dft * "," ~~ phi[c] == kook * "," ~~ "Fisher's p" == pva, list(tn = summary(ftab)$n.cases, dft = c(chsq$parameter), kook = sprintf("%.2f", sjstats::cramer(ftab)), pva = pvas)))) } # if variables have two categories (2x2 table), use phi to calculate # the degree of association } else { # check whether fisher's test or chi-squared should be printed if (is.null(fish)) { modsum <- as.character(as.expression( substitute("N" == tn * "," ~~ chi^2 == c2 * "," ~~ "df" == dft * "," ~~ phi == kook * "," ~~ "p" == pva, list(tn = summary(ftab)$n.cases, c2 = sprintf("%.2f", chsq$statistic), dft = c(chsq$parameter), kook = sprintf("%.2f", sjstats::phi(ftab)), pva = pvas)))) } else { modsum <- as.character(as.expression( substitute("N" == tn * "," ~~ "df" == dft * "," ~~ phi == kook * "," ~~ "Fisher's p" == pva, list(tn = summary(ftab)$n.cases, dft = c(chsq$parameter), kook = sprintf("%.2f", sjstats::phi(ftab)), pva = pvas)))) } } return(modsum) } # Erzeugt eine rotierte Faktorladungen einer Hauptkomponentenanalyse # (Paramter "data") mit einer bestimmten Anzahl an Faktoren (Parameter "factors") # auf Grundlage der Varimax-Rotation # # Parameter: # - data: the results (object) from a principal component analysis # (prcomp(myData...)) # - factors: the amount of factors. can be calculated from the # below function "factorcount" #' @importFrom stats varimax varimaxrota <- function(data, factors) { # Faktorladungen berechnen # Die Faktorladungen erhält man durch Multiplikation der Eigenvektoren # mit der Diagonalmatrix der ausgewiesenen Standardabweichungen ladungen <- data$rotation %*% diag(data$sdev) # Zur Durchführung der VARIMAX-Rotation erzeugen wir eine Matrix # mit den Faktorladungen der ausgewählten Faktoren (Anzahl = Parameter "factors") # Varimax Rotation durchführen varib <- stats::varimax(ladungen[, seq_len(factors)]) varib } # unlist labels # Help function that unlists a list into a vector unlistlabels <- function(lab) { dummy <- unlist(lab) labels <- c() labels <- c(labels, as.character(dummy)) return(labels) } sju.rmspc <- function(html.table) { cleaned <- gsub(" <", "<", html.table, fixed = TRUE, useBytes = TRUE) cleaned <- gsub(" <", "<", cleaned, fixed = TRUE, useBytes = TRUE) cleaned <- gsub(" <", "<", cleaned, fixed = TRUE, useBytes = TRUE) return(cleaned) } .is_false <- function(x) { is.logical(x) && length(x) == 1L && !is.na(x) && !x } sjPlot/R/sjplot.R0000644000176200001440000002434413611404334013372 0ustar liggesusers#' @title Wrapper to create plots and tables within a pipe-workflow #' @name sjplot #' #' @description This function has a pipe-friendly argument-structure, with the #' first argument always being the data, followed by variables that #' should be plotted or printed as table. The function then transforms #' the input and calls the requested sjp.- resp. sjt.-function #' to create a plot or table. \cr \cr #' Both \code{sjplot()} and \code{sjtab()} support grouped data frames. #' #' @param data A data frame. May also be a grouped data frame (see 'Note' and #' 'Examples'). #' @param ... Names of variables that should be plotted, and also further #' arguments passed down to the \pkg{sjPlot}-functions. See 'Examples'. #' @param fun Plotting function. Refers to the function name of \pkg{sjPlot}-functions. #' See 'Details' and 'Examples'. #' #' @return See related sjp. and sjt.-functions. #' #' @note The \code{...}-argument is used, first, to specify the variables from \code{data} #' that should be plotted, and, second, to name further arguments that are #' used in the subsequent plotting functions. Refer to the online-help of #' supported plotting-functions to see valid arguments. #' \cr \cr #' \code{data} may also be a grouped data frame (see \code{\link[dplyr]{group_by}}) #' with up to two grouping variables. Plots are created for each subgroup then. #' #' @details Following \code{fun}-values are currently supported: #' \describe{ #' \item{\code{"aov1"}}{calls \code{\link{sjp.aov1}}. The first #' two variables in \code{data} are used (and required) to create the plot. #' } #' \item{\code{"grpfrq"}}{calls \code{\link{plot_grpfrq}}. The first #' two variables in \code{data} are used (and required) to create the plot. #' } #' \item{\code{"likert"}}{calls \code{\link{plot_likert}}. \code{data} #' must be a data frame with items to plot. #' } #' \item{\code{"stackfrq"}}{calls \code{\link{tab_stackfrq}}. #' \code{data} must be a data frame with items to create the table. #' } #' \item{\code{"xtab"}}{calls \code{\link{plot_xtab}} or \code{\link{tab_xtab}}. #' The first two variables in \code{data} are used (and required) #' to create the plot or table. #' } #' } #' #' @examples #' library(dplyr) #' data(efc) #' #' # Grouped frequencies #' efc %>% sjplot(e42dep, c172code, fun = "grpfrq") #' #' # Grouped frequencies, as box plots #' efc %>% sjplot(e17age, c172code, fun = "grpfrq", #' type = "box", geom.colors = "Set1") #' #' \dontrun{ #' # table output of grouped data frame #' efc %>% #' group_by(e16sex, c172code) %>% #' select(e42dep, n4pstu, e16sex, c172code) %>% #' sjtab(fun = "xtab", use.viewer = FALSE) # open all tables in browser} #' #' @importFrom sjmisc is_empty #' @importFrom sjlabelled copy_labels get_label get_labels #' @importFrom dplyr filter #' @importFrom tidyr nest #' @importFrom stats complete.cases #' @export sjplot <- function(data, ..., fun = c("grpfrq", "xtab", "aov1", "likert")) { # check if x is a data frame if (!is.data.frame(data)) stop("`data` must be a data frame.", call. = F) # match arguments fun <- match.arg(fun) # evaluate arguments, generate data x <- get_dot_data(data, match.call(expand.dots = FALSE)$`...`) # check remaining arguments args <- match.call(expand.dots = FALSE)$`...` args <- args[names(args) != ""] p <- NULL pl <- NULL # do we have a grouped data frame? if (inherits(x, "grouped_df")) { # get grouped data grps <- get_grouped_data(x) # now plot everything for (i in seq_len(nrow(grps))) { # copy back labels to grouped data frame tmp <- sjlabelled::copy_labels(grps$data[[i]], x) # prepare argument list, including title tmp.args <- get_grouped_title(x, grps, args, i, sep = "\n") # plot plots <- plot_sj(tmp, fun, tmp.args) # add plots, check for NULL results if (!is.null(plots$p)) pl <- c(pl, list(plots$p)) if (!is.null(plots$pl)) pl <- c(pl, plots$pl) } } else { # plot plots <- plot_sj(x, fun, args) # we only have one plot call p <- plots$p pl <- plots$pl } # print all plots if (!is.null(pl)) { for (p in pl) suppressWarnings(graphics::plot(p)) invisible(pl) } else { suppressWarnings(graphics::plot(p)) invisible(p) } } #' @rdname sjplot #' @export sjtab <- function(data, ..., fun = c("xtab", "stackfrq")) { # check if x is a data frame if (!is.data.frame(data)) stop("`data` must be a data frame.", call. = F) # match fun-arguments fun <- match.arg(fun) # evaluate arguments, generate data x <- get_dot_data(data, match.call(expand.dots = FALSE)$`...`) tabs.list <- list() # check remaining arguments args <- match.call(expand.dots = FALSE)$`...` args <- args[names(args) != ""] # do we have a grouped data frame? if (inherits(x, "grouped_df")) { # get grouped data grps <- get_grouped_data(x) # now plot everything for (i in seq_len(nrow(grps))) { # copy back labels to grouped data frame tmp <- sjlabelled::copy_labels(grps$data[[i]], x) # prepare argument list, including title tmp.args <- get_grouped_title(x, grps, args, i, sep = "
") # table tl <- tab_sj(tmp, fun, tmp.args) # save list tabs.list[[length(tabs.list) + 1]] <- tl } final.table <- paste0( tl$header, tl$page.style, "\n\n\n" ) final.knitr <- "" # iterate table list for (i in seq_len(length(tabs.list))) { final.table <- paste0(final.table, tabs.list[[i]]$page.content, sep = "\n

 

\n") final.knitr <- paste0(final.knitr, tabs.list[[i]]$knitr, sep = "\n

 

\n") } # close html tags final.table <- paste0(final.table, "\n\n") # return all tables return(structure( class = c("sjTable", "sjtab"), list( page.style = tl$page.style, header = tl$header, page.content = final.table, page.complete = final.table, knitr = final.knitr, file = eval(args[["file"]]), viewer = if (is.null(args[["use.viewer"]])) TRUE else eval(args[["use.viewer"]]) ) )) } else { # plot tab_sj(x, fun, args) } } get_grouped_plottitle <- function(x, grps, i, sep = "\n") { # prepare title for group tp <- get_title_part(x, grps, 1, i) title <- sprintf("%s: %s", tp[1], tp[2]) # do we have another groupng variable? if (length(dplyr::group_vars(x)) > 1) { # prepare title for group tp <- get_title_part(x, grps, 2, i) title <- sprintf("%s%s%s: %s", title, sep, tp[1], tp[2]) } title } get_grouped_title <- function(x, grps, args, i, sep = "\n") { # prepare title for group tp <- get_title_part(x, grps, 1, i) title <- sprintf("%s: %s", tp[1], tp[2]) # do we have another groupng variable? if (length(dplyr::group_vars(x)) > 1) { # prepare title for group tp <- get_title_part(x, grps, 2, i) title <- sprintf("%s%s%s: %s", title, sep, tp[1], tp[2]) } # add title argument to argument list c(args, `title` = title) } #' @importFrom sjlabelled get_values get_label get_labels get_title_part <- function(x, grps, level, i) { # prepare title for group var.name <- colnames(grps)[level] # get values from value labels vals <- sjlabelled::get_values(x[[var.name]]) # if we have no value labels, get values directly if (is.null(vals)) vals <- unique(x[[var.name]]) # find position of value labels for current group lab.pos <- which(vals == grps[[var.name]][i]) # get variable and value labels t1 <- sjlabelled::get_label(x[[var.name]], def.value = var.name) t2 <- sjlabelled::get_labels(x[[var.name]])[lab.pos] # if we have no value label, use value instead if (is.null(t2)) t2 <- vals[lab.pos] # generate title c(t1, t2) } #' @importFrom rlang .data #' @importFrom dplyr select filter group_modify group_vars #' @importFrom stats complete.cases #' get_grouped_data <- function(x) { # retain observations that are complete wrt grouping vars, then nest grps <- x %>% dplyr::group_modify(~ dplyr::filter(.x, stats::complete.cases(.y))) %>% tidyr::nest() # arrange data if (length(dplyr::group_vars(x)) == 1) reihe <- order(grps[[1]]) else reihe <- order(grps[[1]], grps[[2]]) grps <- grps[reihe, ] grps } plot_sj <- function(x, fun, args) { p <- NULL pl <- NULL # choose plottype, and call plot-function with or w/o additional arguments if (sjmisc::is_empty(args)) { if (fun == "grpfrq") { p <- plot_grpfrq(x[[1]], x[[2]]) } else if (fun == "likert") { p <- plot_likert(x) } else if (fun == "xtab") { p <- plot_xtab(x[[1]], x[[2]]) } else if (fun == "aov1") { p <- sjp.aov1(x[[1]], x[[2]]) } } else { if (fun == "grpfrq") { p <- do.call(plot_grpfrq, args = c(list(var.cnt = x[[1]], var.grp = x[[2]]), args)) } else if (fun == "likert") { p <- do.call(plot_likert, args = c(list(items = x), args)) } else if (fun == "xtab") { p <- do.call(plot_xtab, args = c(list(x = x[[1]], grp = x[[2]]), args)) } else if (fun == "aov1") { p <- do.call(sjp.aov1, args = c(list(var.dep = x[[1]], var.grp = x[[2]]), args)) } } list(p = p, pl = pl) } tab_sj <- function(x, fun, args) { # choose plottype, and call plot-function with or w/o additional arguments if (sjmisc::is_empty(args)) { if (fun == "xtab") { tab_xtab(x[[1]], x[[2]]) } else if (fun == "stackfrq") { tab_stackfrq(x) } } else { if (fun == "stackfrq") { do.call(tab_stackfrq, args = c(list(items = x), args)) } else if (fun == "xtab") { do.call(tab_xtab, args = c(list(var.row = x[[1]], var.col = x[[2]]), args)) } } } sjPlot/R/sjPlotPolynomials.R0000644000176200001440000002570613612122336015564 0ustar liggesusers#' @title Plot polynomials for (generalized) linear regression #' @name sjp.poly #' #' @description This function plots a scatter plot of a term \code{poly.term} #' against a response variable \code{x} and adds - depending on #' the amount of numeric values in \code{poly.degree} - multiple #' polynomial curves. A loess-smoothed line can be added to see #' which of the polynomial curves fits best to the data. #' #' @param x A vector, representing the response variable of a linear (mixed) model; or #' a linear (mixed) model as returned by \code{\link{lm}} or \code{\link[lme4]{lmer}}. #' @param poly.term If \code{x} is a vector, \code{poly.term} should also be a vector, representing #' the polynomial term (independent variabl) in the model; if \code{x} is a #' fitted model, \code{poly.term} should be the polynomial term's name as character string. #' See 'Examples'. #' @param poly.degree Numeric, or numeric vector, indicating the degree of the polynomial. #' If \code{poly.degree} is a numeric vector, multiple polynomial curves for #' each degree are plotted. See 'Examples'. #' @param poly.scale Logical, if \code{TRUE}, \code{poly.term} will be scaled before #' linear regression is computed. Default is \code{FALSE}. Scaling the polynomial #' term may have an impact on the resulting p-values. #' @param fun Linear function when modelling polynomial terms. Use \code{fun = "lm"} #' for linear models, or \code{fun = "glm"} for generalized linear models. #' When \code{x} is not a vector, but a fitted model object, the function #' is detected automatically. If \code{x} is a vector, \code{fun} defaults #' to \code{"lm"}. #' @param show.loess Logical, if \code{TRUE}, an additional loess-smoothed line is plotted. #' @param show.loess.ci Logical, if \code{TRUE}, a confidence region for the loess-smoothed line #' will be plotted. #' @param show.p Logical, if \code{TRUE} (default), p-values for polynomial terms are #' printed to the console. #' @param loess.color Color of the loess-smoothed line. Only applies, if \code{show.loess = TRUE}. #' @param show.scatter Logical, if TRUE (default), adds a scatter plot of data #' points to the plot. #' @param point.alpha Alpha value of point-geoms in the scatter plots. Only #' applies, if \code{show.scatter = TRUE}. #' @param point.color Color of of point-geoms in the scatter plots. Only applies, #' if \code{show.scatter = TRUE.} #' #' @return A ggplot-object. #' #' #' @inheritParams plot_model #' @inheritParams plot_scatter #' @inheritParams plot_grpfrq #' #' @details For each polynomial degree, a simple linear regression on \code{x} (resp. #' the extracted response, if \code{x} is a fitted model) is performed, #' where only the polynomial term \code{poly.term} is included as independent variable. #' Thus, \code{lm(y ~ x + I(x^2) + ... + I(x^i))} is repeatedly computed #' for all values in \code{poly.degree}, and the predicted values of #' the reponse are plotted against the raw values of \code{poly.term}. #' If \code{x} is a fitted model, other covariates are ignored when #' finding the best fitting polynomial. \cr \cr #' This function evaluates raw polynomials, \emph{not orthogonal} polynomials. #' Polynomials are computed using the \code{\link{poly}} function, #' with argument \code{raw = TRUE}. \cr \cr #' To find out which polynomial degree fits best to the data, a loess-smoothed #' line (in dark grey) can be added (with \code{show.loess = TRUE}). The polynomial curves #' that comes closest to the loess-smoothed line should be the best #' fit to the data. #' #' @examples #' library(sjmisc) #' data(efc) #' # linear fit. loess-smoothed line indicates a more #' # or less cubic curve #' sjp.poly(efc$c160age, efc$quol_5, 1) #' #' # quadratic fit #' sjp.poly(efc$c160age, efc$quol_5, 2) #' #' # linear to cubic fit #' sjp.poly(efc$c160age, efc$quol_5, 1:4, show.scatter = FALSE) #' #' #' # fit sample model #' fit <- lm(tot_sc_e ~ c12hour + e17age + e42dep, data = efc) #' # inspect relationship between predictors and response #' plot_model(fit, type = "slope") #' # "e17age" does not seem to be linear correlated to response #' # try to find appropiate polynomial. Grey line (loess smoothed) #' # indicates best fit. Looks like x^4 has the best fit, #' # however, only x^3 has significant p-values. #' sjp.poly(fit, "e17age", 2:4, show.scatter = FALSE) #' #' \dontrun{ #' # fit new model #' fit <- lm(tot_sc_e ~ c12hour + e42dep + e17age + I(e17age^2) + I(e17age^3), #' data = efc) #' # plot marginal effects of polynomial term #' plot_model(fit, type = "pred", terms = "e17age")} #' #' @import ggplot2 #' @importFrom scales grey_pal brewer_pal #' @importFrom stats lm glm binomial predict poly #' @importFrom graphics plot #' @export sjp.poly <- function(x, poly.term, poly.degree, poly.scale = FALSE, fun = NULL, axis.title = NULL, geom.colors = NULL, geom.size = .8, show.loess = TRUE, show.loess.ci = TRUE, show.p = TRUE, show.scatter = TRUE, point.alpha = .2, point.color = "#404040", loess.color = "#808080") { # -------------------------------------------- # check color parameter # -------------------------------------------- geom.colors <- col_check2(geom.colors, length(poly.degree)) # -------------------------------------------- # check poly.term parameter # -------------------------------------------- if (is.character(poly.term)) defv <- poly.term else defv <- get_var_name(deparse(substitute(poly.term))) # -------------------------------------------- # parameter check: fitted model or variables? # -------------------------------------------- if (!is.vector(x) && !is.numeric(x) && !is.factor(x)) { mf <- insight::get_data(x) # retrieve response vector resp <- insight::get_response(x) # retrieve polynomial term poly.term <- mf[[poly.term]] } else { resp <- x } # -------------------------------------------- # check for glm or lm # -------------------------------------------- if (is.null(fun)) { if (inherits(x, c("glmerMod", "glm"))) { fun <- "glm" } else { fun <- "lm" } } # -------------------------------------------- # retrieve labels # -------------------------------------------- if (is.null(axis.title)) axis.title <- sjlabelled::get_label(poly.term, def.value = defv) axisTitle.y <- sjlabelled::get_label(resp, def.value = "Response") # -------------------------------------------- # init data frame # -------------------------------------------- plot.df <- data.frame() # scale polynomial term? if (poly.scale) poly.term <- scale(poly.term) # -------------------------------------------- # get cutpoints for loess curve # -------------------------------------------- # cutpoints <- get_loess_cutpoints(stats::na.omit(data.frame(x = poly.term, y = resp))) # -------------------------------------------- # if user wants to plot multiple curves for # polynomials, create data frame for each curve here # -------------------------------------------- for (i in poly.degree) { # poly-function can't cope with missings, so remove them here mydat <- stats::na.omit(data.frame(x = poly.term, y = resp)) # fit model with polynomials if (fun == "lm") fit <- stats::lm(mydat$y ~ stats::poly(mydat$x, i, raw = TRUE)) else fit <- stats::glm(mydat$y ~ stats::poly(mydat$x, i, raw = TRUE), family = stats::family(x)) # check whether we have an integer poly.degree # or a float value poly.digit <- ifelse(i %% 1 == 0, 0, 1) # create data frame with raw data and the fitted poly-curve plot.df <- rbind(plot.df, cbind(mydat, stats::predict(fit), sprintf("x^%.*f", poly.digit, i))) # print p-values? if (show.p) { # get p-values pvals <- summary(fit)$coefficients[-1, 4] # prepare output string p.out <- sprintf("Polynomial degrees: %.*f\n---------------------\n", poly.digit, i) # iterate polynomial terms and print p-value for each polynom for (j in seq_len(i)) p.out <- paste0(p.out, sprintf("p(x^%i): %.3f\n", j, unname(pvals[j]))) # add separator line after each model p.out <- paste0(p.out, "\n") # print p-values for fitted model cat(p.out) } } # name df colnames(plot.df) <- c("x","y", "pred", "grp") # create plot polyplot <- ggplot(plot.df, aes_string(x = "x", y = "y", colour = "grp")) # show scatter plot as well? if (show.scatter) polyplot <- polyplot + geom_jitter(colour = point.color, alpha = point.alpha, shape = 16) # show loess curve? this curve indicates the "perfect" curve through # the data if (show.loess) polyplot <- polyplot + stat_smooth(method = "loess", color = loess.color, se = show.loess.ci, size = geom.size) # add curves for polynomials polyplot <- polyplot + geom_line(aes_string(y = "pred"), size = geom.size) + scale_color_manual(values = geom.colors, labels = lapply(poly.degree, function(j) bquote(x^.(j)))) + labs(x = axis.title, y = axisTitle.y, colour = "Polynomial\ndegrees") polyplot } #' @importFrom stats loess predict get_loess_cutpoints <- function(mydat) { # sort data frame by x-values mydat <- mydat[order(mydat$x), ] # fit loess fit <- stats::loess(y ~ x, mydat) # get predicted values preds <- unique(stats::predict(fit)) xuni <- unique(mydat$x) # define counter cnt <- 1 cutpoints <- c() xvals <- c() # initial direction for finding first cutpoint? direction <- ifelse(preds[cnt + 1] > preds[cnt], "up", "down") # "follow" path of loess line until cutpoint # then save value and change direction while (cnt < length(preds)) { if (direction == "up") { if (preds[cnt + 1] < preds[cnt]) { direction <- "down" cutpoints <- c(cutpoints, preds[cnt]) xvals <- c(xvals, xuni[cnt]) } } else { if (preds[cnt + 1] > preds[cnt]) { direction <- "up" cutpoints <- c(cutpoints, preds[cnt]) xvals <- c(xvals, xuni[cnt]) } } cnt <- cnt + 1 } data.frame(cutpoint.x = xvals, cutpoint.y = cutpoints) } sjPlot/R/plot_likert.R0000644000176200001440000007537613662304072014426 0ustar liggesusers#' @title Plot likert scales as centered stacked bars #' @name plot_likert #' #' @description Plot likert scales as centered stacked bars. #' #' @note Note that only even numbers of categories are possible to plot, so the "positive" #' and "negative" values can be splitted into two halfs. A neutral category (like "don't know") #' can be used, but must be indicated by \code{cat.neutral}. \cr \cr #' The \code{catcount}-argument indicates how many item categories are in the #' Likert scale. Normally, this argument can be ignored because the amount of #' valid categories is retrieved automatically. However, sometimes (for instance, #' if a certain category is missing in all items), auto-detection of the amount #' of categories fails. In such cases, specify the amount of categories #' with the \code{catcount}-argument. #' #' @param catcount optional, amount of categories of \code{items} (e.g. \emph{"strongly disagree", #' "disagree", "agree"} and \emph{"strongly agree"} would be \code{catcount = 4}). #' Note that this argument only applies to "valid" answers, i.e. if you #' have an additional neutral category (see \code{cat.neutral}) like \emph{"don't know"}, #' this won't count for \code{catcount} (e.g. "strongly disagree", #' "disagree", "agree", "strongly agree" and neutral category "don't know" #' would still mean that \code{catcount = 4}). See 'Note'. #' @param cat.neutral If there's a neutral category (like "don't know" etc.), specify #' the index number (value) for this category. Else, set \code{cat.neutral = NULL} (default). #' The proportions of neutral category answers are plotted as grey bars on the left side of #' the figure. #' @param sort.frq Indicates whether the items of \code{items} should be ordered by #' total sum of positive or negative answers. #' \describe{ #' \item{\code{"pos.asc"}}{to order ascending by sum of positive answers} #' \item{\code{"pos.desc"}}{to order descending by sum of positive answers} #' \item{\code{"neg.asc"}}{for sorting ascending negative answers} #' \item{\code{"neg.desc"}}{for sorting descending negative answers} #' \item{\code{NULL}}{(default) for no sorting} #' } #' @param reverse.colors logical, if \code{TRUE}, the color scale from \code{geom.colors} will be reversed, #' so positive and negative values switch colors. #' @param cat.neutral.color Color of the neutral category, if plotted (see \code{cat.neutral}). #' @param intercept.line.color Color of the vertical intercept line that divides positive and negative values. #' @param values Determines style and position of percentage value labels on the bars: #' \describe{ #' \item{\code{"show"}}{(default) shows percentage value labels in the middle of each category bar} #' \item{\code{"hide"}}{hides the value labels, so no percentage values on the bars are printed} #' \item{\code{"sum.inside"}}{shows the sums of percentage values for both negative and positive values and prints them inside the end of each bar} #' \item{\code{"sum.outside"}}{shows the sums of percentage values for both negative and positive values and prints them outside the end of each bar} #' } #' @param show.prc.sign logical, if \code{TRUE}, \%-signs for value labels are shown. #' @param grid.range Numeric, limits of the x-axis-range, as proportion of 100. #' Default is 1, so the x-scale ranges from zero to 100\% on both sides from the center. #' Can alternatively be supplied as a vector of 2 positive numbers (e.g. \code{grid.range = c(1, .8)}) #' to set the left and right limit separately. You can use values beyond 1 (100\%) in case bar labels are not printed because #' they exceed the axis range. E.g. \code{grid.range = 1.4} will set the axis from -140 to +140\%, however, only #' (valid) axis labels from -100 to +100\% are printed. Neutral categories are adjusted to the most left limit. #' @param reverse.scale logical, if \code{TRUE}, the ordering of the categories is reversed, so positive and negative values switch position. #' @param groups (optional) Must be a vector of same length as \code{ncol(items)}, #' where each item in this vector represents the group number #' of the related columns of \code{items}. See 'Examples'. #' @param groups.titles (optional, only used if groups are supplied) Titles for each factor group that will be used as table caption for each #' component-table. Must be a character vector of same length as \code{length(unique(groups))}. #' Default is \code{"auto"}, which means that each table has a standard caption \emph{Component x}. #' Use \code{NULL} to use names as supplied to \code{groups} and use \code{FALSE} to suppress table captions. #' @param sort.groups (optional, only used if groups are supplied) logical, if groups should be sorted according to the values supplied to \code{groups}. Defaults to \code{TRUE}. #' @param legend.pos (optional, only used if groups are supplied) Defines the legend position. Possible values are \code{c("bottom", "top", "both", "all", "none")}. #' If the is only one group or this option is set to \code{"all"} legends will be printed as defined with \code{\link{set_theme}}. #' @param rel_heights (optional, only used if groups are supplied) This option can be used to adjust the height of the subplots. The bars in subplots can have different heights due to a differing number of items #' or due to legend placement. This can be adjusted here. Takes a vector of numbers, one #' for each plot. Values are evaluated relative to each other. #' @param group.legend.options (optional, only used if groups are supplied) List of options to be passed to \code{\link[ggplot2]{guide_legend}}. #' The most notable options are \code{byrow=T} (default), this will order the categories row wise. #' And with \code{group.legend.options = list(nrow = 1)} all categories can be forced to be on a single row. #' @param cowplot.options (optional, only used if groups are supplied) List of label options to be passed to \code{\link[cowplot]{plot_grid}}. #' #' @inheritParams plot_grpfrq #' @inheritParams plot_stackfrq #' @inheritParams plot_model #' #' @return A ggplot-object. #' #' @examples #' library(sjmisc) #' data(efc) #' # find all variables from COPE-Index, which all have a "cop" in their #' # variable name, and then plot that subset as likert-plot #' mydf <- find_var(efc, pattern = "cop", out = "df") #' #' plot_likert(mydf) #' #' plot_likert( #' mydf, #' grid.range = c(1.2, 1.4), #' expand.grid = FALSE, #' values = "sum.outside", #' show.prc.sign = TRUE #' ) #' #' # Plot in groups #' #' plot_likert(mydf, c(2,1,1,1,1,2,2,2,1)) #' #' if (require("parameters") && require("nFactors")) { #' groups <- parameters::principal_components(mydf) #' plot_likert(mydf, groups = parameters::closest_component(groups)) #' } #' #' plot_likert(mydf, #' c(rep("B", 4), rep("A", 5)), #' sort.groups = FALSE, #' grid.range = c(0.9, 1.1), #' geom.colors = "RdBu", #' rel_heights = c(6, 8), #' wrap.labels = 40, #' reverse.scale = TRUE) #' #' # control legend items #' six_cat_example = data.frame( #' matrix(sample(1:6, 600, replace = TRUE), #' ncol = 6) #' ) #' #' \dontrun{ #' six_cat_example <- #' six_cat_example %>% #' dplyr::mutate_all(~ordered(.,labels = c("+++","++","+","-","--","---"))) #' #' # Old default #' plot_likert( #' six_cat_example, #' groups = c(1, 1, 1, 2, 2, 2), #' group.legend.options = list(nrow = 2, byrow = FALSE) #' ) #' #' # New default #' plot_likert(six_cat_example, groups = c(1, 1, 1, 2, 2, 2)) #' #' # Single row #' plot_likert( #' six_cat_example, #' groups = c(1, 1, 1, 2, 2, 2), #' group.legend.options = list(nrow = 1) #' )} #' @import ggplot2 #' @importFrom stats na.omit xtabs #' @importFrom sjmisc is_odd set_na is_empty #' @importFrom sjlabelled as_numeric #' @importFrom purrr map flatten_dbl #' @importFrom dplyr between #' @export plot_likert <- function(items, groups = NULL, groups.titles = "auto", title = NULL, legend.title = NULL, # Options to be passed directly to .plot_likert() legend.labels = NULL, axis.titles = NULL, axis.labels = NULL, catcount = NULL, cat.neutral = NULL, sort.frq = NULL, weight.by = NULL, title.wtd.suffix = NULL, wrap.title = 50, wrap.labels = 30, wrap.legend.title = 30, wrap.legend.labels = 28, geom.size = .6, geom.colors = "BrBG", cat.neutral.color = "grey70", intercept.line.color = "grey50", reverse.colors = FALSE, values = "show", show.n = TRUE, show.legend = TRUE, show.prc.sign = FALSE, grid.range = 1, grid.breaks = 0.2, expand.grid = TRUE, digits = 1, reverse.scale = FALSE, coord.flip = TRUE, sort.groups = TRUE, # Group Options legend.pos = "bottom", rel_heights = 1, group.legend.options = list(nrow = NULL, byrow = TRUE), # Add rowwise order of levels and option to force a single rowed legend for 6 or more categories cowplot.options = list(label_x = 0.01, hjust = 0, align = "v") # Fix for label position depending on label length bug in cowplot ) { # Select options to be passed to .plot_likert() .likert_options <- as.list(environment())[5:32] ## If now no groups are supplied only 1 group will be assumed. Check for cowplot is only performed if there are groups supplied. if (is.null(groups)) { groups <- rep(1, length.out = ncol(items)) } else { if (!requireNamespace("cowplot", quietly = T)) stop("Package 'cowplot' required for this function wor work. Please install it.", call. = F) } if (ncol(items) != length(groups)) stop("Length of groups has to equal the number of items: ncol(items) != length(groups).", call. = F) # retrieve unique factor / group index values findex <- unique(groups) if (sort.groups) findex <- sort(findex) # Add empty title to plots, to create space for the group.labels if (is.null(title) & length(findex) != 1) title <- rep("", length(findex)) .plot_list <- list() # iterate all sub-plots (groups) for (i in seq_along(findex)) { index <- which(groups == findex[i]) .pl <- do.call(".plot_likert", args = c(list(items[, index], title = title[i]), .likert_options)) # If there are 2 or more groups, the legend will be plotted according to legend.pos. if (length(findex) != 1) { if (legend.pos %in% c("top", "both") & i == 1) .pl <- .pl + theme(legend.position = "top") + guides(fill = do.call(guide_legend, group.legend.options)) else if (legend.pos %in% c("bottom", "both") & i == length(findex)) .pl <- .pl + theme(legend.position = "bottom") + guides(fill = do.call(guide_legend, group.legend.options)) else if (legend.pos != "all") .pl <- .pl + theme(legend.position = "none") } .plot_list[i] <- list(.pl) } # Options to turn off or overwrite cowplot group.labels. if (.is_false(groups.titles)) { groups.titles <- rep("", length(findex)) } else if (!is.null(groups.titles) && (groups.titles[1] == "auto" || length(groups.titles) != length(findex)) && (is.numeric(groups))) { groups.titles <- sprintf("Component %i", seq_along(findex)) # For tab_itemscale compatibility } else if (length(groups.titles) != length(findex)) { groups.titles <- findex } # If groups were supplied, combine the subplots with cowplot::plot_grid() if (length(findex) == 1) { .out <- .plot_list[[1]] } else { .out <- do.call(get("plot_grid", asNamespace("cowplot")), args = c( list( "plotlist" = .plot_list, "labels" = groups.titles, "rel_heights" = rel_heights, "ncol" = 1 ), cowplot.options )) } .out } .plot_likert <- function(items, title = NULL, legend.title = NULL, legend.labels = NULL, axis.titles = NULL, axis.labels = NULL, catcount = NULL, cat.neutral = NULL, sort.frq = NULL, weight.by = NULL, title.wtd.suffix = NULL, wrap.title = 50, wrap.labels = 30, wrap.legend.title = 30, wrap.legend.labels = 28, geom.size = .6, geom.colors = "BrBG", cat.neutral.color = "grey70", intercept.line.color = "grey50", reverse.colors = FALSE, values = "show", show.n = TRUE, show.legend = TRUE, show.prc.sign = FALSE, grid.range = 1, grid.breaks = 0.2, expand.grid = TRUE, digits = 1, reverse.scale = FALSE, coord.flip = TRUE) { # check param. if we have a single vector instead of # a data frame with several items, convert vector to data frame if (!is.data.frame(items) && !is.matrix(items)) items <- as.data.frame(items) # if grid.range is supplied as 1 value, it is duplicated for symmetric results. This is for compatibillity with older versions. if (length(grid.range) == 1) grid.range <- c(grid.range, grid.range) # copy titles if (is.null(axis.titles)) { axisTitle.x <- NULL axisTitle.y <- NULL } else { axisTitle.x <- axis.titles[1] if (length(axis.titles) > 1) axisTitle.y <- axis.titles[2] else axisTitle.y <- NULL } # check sorting if (!is.null(sort.frq)) { if (sort.frq == "pos.asc") { sort.frq <- "pos" reverseOrder <- FALSE } if (sort.frq == "pos.desc") { sort.frq <- "pos" reverseOrder <- TRUE } if (sort.frq == "neg.asc") { sort.frq <- "neg" reverseOrder <- FALSE } if (sort.frq == "neg.desc") { sort.frq <- "neg" reverseOrder <- TRUE } } else { reverseOrder <- FALSE } # try to automatically set labels is not passed as argument if (is.null(legend.labels)) { legend.labels <- sjlabelled::get_labels( items[[1]], attr.only = F, values = NULL, non.labelled = T ) } if (is.null(axis.labels)) { # retrieve variable name attribute axis.labels <- unname(sjlabelled::get_label(items, def.value = colnames(items))) } # unname labels, if necessary, so we have a simple character vector if (!is.null(names(axis.labels))) axis.labels <- as.vector(axis.labels) if (!is.null(legend.labels)) { if (!is.null(names(legend.labels))) legend.labels <- as.vector(legend.labels) } # determine catcount adding <- ifelse(is.null(cat.neutral), 0, 1) if (is.null(catcount)) { # add new unique item values to catcount, so catcount # finally contains all unique values of items catcount <- items %>% purrr::map(~ stats::na.omit(unique(.x))) %>% purrr::flatten_dbl() %>% unique() %>% sort() neutral.between <- FALSE # remove neutral category if (!is.null(cat.neutral)) { # find neutral cat value in catcount ncv_pos <- which(catcount == cat.neutral) # if not empty, remove if (!sjmisc::is_empty(ncv_pos)) { catcount <- catcount[-ncv_pos] neutral.between <- dplyr::between(cat.neutral, min(catcount), max(catcount)) } } # detect range of valid categories, which # then equals catcount catcount <- max(catcount) - min(catcount) + 1 # check if category count matches category label count if (!is.null(legend.labels)) { # how many labels do we have? # substract 1, if we have neutral category lll <- length(legend.labels) - adding # catcount and legend label count equal? if (catcount < lll) { # warn user that detected amount of categories and supplied legend labels # are different. warning("Length of labels for item categories `legend.labels` differs from detected amount of categories. Use `catcount` argument to define amount of item categories, if plotting does not work.", call. = F) # adjust catcount to length of legend labels, because # we assume that labels represent the valid range of # item categories catcount <- lll } } # is catcount odd or even? make catcount even if (sjmisc::is_odd(catcount)) { # warn user about uneven category count, but only if # neutral category is not inside valid categories if (!neutral.between) warning("Detected uneven category count in items. Dropping last category.", call. = F) catcount <- catcount - 1 } } # set legend labels, if we have none yet if (is.null(legend.labels)) legend.labels <- seq_len(catcount + adding) # prepare data frames mydat.pos <- data.frame() mydat.neg <- data.frame() mydat.dk <- data.frame() freq.df <- data.frame() # If we have neutral category in between and not as last # category, recode neutral category to last category if (!is.null(cat.neutral) && cat.neutral <= catcount) { # first, each other category has to be moved down one position # therefore, we create a pattern with values from neutral # category to category count downvote <- seq(cat.neutral, catcount + 1, by = 1) # now we "shift" this value pattern and make a # string out of it recode.pattern <- paste0( paste0(sprintf("%i=%i", c(downvote[-1], downvote[1]), downvote), collapse = ";"), ";else=copy" ) # all factors with char labels need to be numeric, # else, recode won't work items <- purrr::modify_if( items, is_labelled_factor, sjlabelled::as_numeric, keep.labels = FALSE ) # finally, recode data items <- sjmisc::rec(items, rec = recode.pattern, append = FALSE) # re-order legend labels as well ll.order <- c(seq_len(catcount + adding)[-cat.neutral], cat.neutral) legend.labels <- legend.labels[ll.order] } # loop through all likert-items for (i in seq_len(ncol(items))) { # convert to numeric values if (!is.numeric(items[[i]])) { items[[i]] <- sjlabelled::as_numeric(items[[i]], keep.labels = F) } # If we don't plot neutral category, but item still contains # that category, replace it with NA if (is.null(cat.neutral) && max(items[[i]], na.rm = T) > catcount) items[[i]] <- sjmisc::set_na(items[[i]], na = catcount + 1, as.tag = F) # create proportional frequency table if (is.null(weight.by)) { tab <- round(prop.table(table(items[[i]])), digits + 3) } else { tab <- round(prop.table(stats::xtabs(weight.by ~ items[[i]])), digits + 3) } # retrieve category number and related frequencies counts <- as.numeric(tab) valid <- as.numeric(names(tab)) # create frequency vector, so zero-categories are cared for freq <- rep(0, catcount + adding) freq[valid] <- counts # append to data frame if (ncol(freq.df) == 0) freq.df <- as.data.frame(freq) else { # check for valid rows. if we hav missing categories # in all items, argument "catcount" must be set, because # automatic detection of amount of categories does not # work then. if (length(freq) != nrow(freq.df)) stop("Could not determine amount of item categories. Please use argument `catcount`.", call. = F) else freq.df <- as.data.frame(cbind(freq.df, freq)) } } # Check whether N of each item should be included into axis labels if (show.n) { for (i in seq_len(length(axis.labels))) { axis.labels[i] <- paste( axis.labels[i], sprintf(" (n=%i)", length(stats::na.omit(items[[i]]))), sep = "" ) } } # determine split between pos and neg values # lower.half <- rev(seq(catcount / 2)) lower.half <- rev(seq(ceiling(catcount / 2))) # upper.half <- 1 + catcount - lower.half upper.half <- setdiff(seq.int(catcount), lower.half) # sum up values to total, so we can sort items sums.lower <- unname(apply(freq.df[lower.half, , drop = FALSE], 2, sum)) sums.upper <- unname(apply(freq.df[upper.half, , drop = FALSE], 2, sum)) # sort items if (is.null(sort.frq)) sort.freq <- seq_len(ncol(freq.df)) else if (sort.frq == "pos") sort.freq <- order(sums.lower) else if (sort.frq == "neg") sort.freq <- order(sums.upper) else sort.freq <- seq_len(ncol(freq.df)) # reverse item order? if (!reverseOrder) sort.freq <- rev(sort.freq) # save summed up y-values, for label positioning and annotation ypos.sum.pos <- c() ypos.sum.neg <- c() ypos.sum.dk <- c() # iterate all frequencies of the items. we have the simple # data rows in this data frame and now need to "split" # positive and negative values for (i in seq_len(ncol(freq.df))) { # sort fr <- freq.df[, sort.freq[i]] # positive values. we need an x-position for each item, # a group indicator, the frequencies (as percent value), # and the y position for labels. mydat.pos <- as.data.frame( rbind(mydat.pos, cbind(x = i, grp = lower.half, frq = fr[lower.half], ypos = cumsum(fr[lower.half]) - 0.5 * (fr[lower.half]), ypos2 = sum(fr[lower.half]) ))) # summed y-position for plotting the summed up frequency labels ypos.sum.pos <- c(ypos.sum.pos, sum(fr[lower.half])) # same as above for negative values mydat.neg <- as.data.frame( rbind(mydat.neg, cbind(x = i, grp = upper.half, frq = -fr[upper.half], ypos = -1 * (cumsum(fr[upper.half]) - 0.5 * (fr[upper.half])), ypos2 = -1 * sum(fr[upper.half]) ))) # summed up (cumulative) percs ypos.sum.neg <- c(ypos.sum.neg, -1 * sum(fr[upper.half])) # same as above for neutral category, if we have any if (!is.null(cat.neutral)) { mydat.dk <- as.data.frame( rbind(mydat.dk, cbind(x = i, grp = catcount + adding, frq = -1 + fr[catcount + adding], ypos = -1 + (fr[catcount + adding] / 2), ypos2 = -1 + fr[catcount + adding], offset = -1 * grid.range[1]) )) # cumulative neutral cat ypos.sum.dk <- c(ypos.sum.dk, -1 + fr[catcount + adding]) } } # x-positions for cumulative percentages xpos.sum.dk <- xpos.sum.neg <- xpos.sum.pos <- seq_len(length(ypos.sum.pos)) # grp as factor mydat.pos$grp <- as.factor(mydat.pos$grp) mydat.neg$grp <- as.factor(mydat.neg$grp) # same for neutral if (!is.null(cat.neutral)) { mydat.dk$grp <- as.factor("neutral") mydat.dk$geom.size <- geom.size mydat.dk$digits <- digits } # label digits needed mydat.neg$digits <- digits mydat.pos$digits <- digits # Prepare and trim legend labels to appropriate size legend.labels <- sjmisc::word_wrap(legend.labels, wrap.legend.labels) if (!is.null(legend.title)) { legend.title <- sjmisc::word_wrap(legend.title, wrap.legend.title) } if (!is.null(title)) { if (!is.null(title.wtd.suffix)) { title <- paste(title, title.wtd.suffix, sep = "") } title <- sjmisc::word_wrap(title, wrap.title) } # check length of x-axis-labels and split longer strings at into new lines # every 10 chars, so labels don't overlap axis.labels <- sjmisc::word_wrap(axis.labels, wrap.labels) axis.labels <- axis.labels[sort.freq] # set diagram margins if (expand.grid) { expgrid <- waiver() } else { expgrid <- c(0, 0) } # Set up grid breaks. Calculate grid breaks starting at the center (0). Negative sequence is reversed. Positive sequence is skipping the 0 to avoid doubeling. gridbreaks <- round(c(rev(seq(0, -grid.range[1], by = -grid.breaks)), seq(grid.breaks, grid.range[2], by = grid.breaks)), 2) gridlabs <- ifelse(abs(gridbreaks) > 1, "", paste0(abs(round(100 * gridbreaks)), "%")) # start plot here gp <- ggplot() + # positive value bars geom_col( data = mydat.pos, aes_string(x = "x", y = "frq", fill = "grp"), width = geom.size ) + # negative value bars geom_col( data = mydat.neg, aes_string(x = "x", y = "frq", fill = "grp"), width = geom.size, position = position_stack(reverse = T) ) # print bar for neutral category. this is a "fake" bar created # with geom_rect. to make this work, we need to set the x-axis # to a continuous scale... if (!is.null(cat.neutral)) { gp <- gp + geom_rect( data = mydat.dk, aes( xmin = .data$x - (geom.size / 2), xmax = .data$x + (geom.size / 2), ymin = .data$offset, ymax = .data$frq + (.data$offset + 1), fill = "neutral") ) } # if we have neutral colors, we need to add the geom-color # to the color values. if (!is.null(cat.neutral)) geom.colors <- c(geom.colors, cat.neutral.color) # should percentage value labels be printed? percsign <- mydat.pos$percsign <- mydat.neg$percsign <- ifelse(isTRUE(show.prc.sign), "%", "") if (nrow(mydat.dk) > 0) mydat.dk$percsign <- percsign # creating value labels for cumulative percentages, so # zero-percentages are not printed ypos.sum.pos.lab <- ifelse(ypos.sum.pos > 0, sprintf("%.*f%s", digits, 100 * ypos.sum.pos, percsign), "") ypos.sum.neg.lab <- ifelse(ypos.sum.neg < 0, sprintf("%.*f%s", digits, 100 * abs(ypos.sum.neg), percsign), "") ypos.sum.dk.lab <- ifelse(ypos.sum.dk > -1, sprintf("%.*f%s", digits, 100 * (1 + ypos.sum.dk), percsign), "") if (values == "show") { if (!requireNamespace("ggrepel", quietly = TRUE)) { stop("Package `ggrepel` needed to plot labels. Please install it.", call. = FALSE) } # show them in middle of bar gp <- gp + ggrepel::geom_text_repel( data = dplyr::filter(mydat.pos, .data$frq > 0), aes( x = .data$x, y = .data$frq, label = sprintf("%.*f%s", digits, 100 * .data$frq, percsign) ), direction = "y", position = position_stack(vjust = 0.5, reverse = TRUE), force = .5, point.padding = NA ) + ggrepel::geom_text_repel( data = dplyr::filter(mydat.neg, .data$frq < 0), aes( x = .data$x, y = .data$frq, label = sprintf("%.*f%s", digits, 100 * abs(.data$frq), percsign) ), direction = "y", position = position_stack(vjust = 0.5, reverse = TRUE), force = .5, point.padding = NA ) if (!is.null(cat.neutral)) { gp <- gp + geom_text( data = dplyr::filter(mydat.dk, .data$frq > -1), aes( x = .data$x, y = .data$ypos + .data$offset + 1, label = sprintf("%.*f%s", digits, 100 * (1 + .data$frq), percsign) ) ) } } else if (values == "sum.inside" || values == "sum.outside") { # choose label offsets for summed proportions move_pos_labels_left = dplyr::case_when( values == "sum.outside" & !reverse.scale ~ T, values == "sum.inside" & !reverse.scale ~ F, values == "sum.outside" & reverse.scale ~ F, values == "sum.inside" & reverse.scale ~ T ) # show cumulative outside bar if (move_pos_labels_left) { hort.pos <- -0.15 hort.neg <- 1.15 hort.dk <- -0.15 # show cumulative inside bar } else { hort.pos <- 1.15 hort.neg <- -0.15 hort.dk <- 1.15 } gp <- gp + annotate("text", x = xpos.sum.pos, y = ypos.sum.pos, hjust = hort.pos, label = ypos.sum.pos.lab) + annotate("text", x = xpos.sum.neg, y = ypos.sum.neg, hjust = hort.neg, label = ypos.sum.neg.lab) if (!is.null(cat.neutral)) { gp <- gp + annotate("text", x = xpos.sum.dk, y = ypos.sum.dk + 1 - grid.range[1], hjust = hort.dk, label = ypos.sum.dk.lab) } } # continues with plot gp <- gp + labs(title = title, x = axisTitle.x, y = axisTitle.y, fill = legend.title) + # scale x is continuous to make plotting the bar annotation # for neutral category work... scale_x_continuous(breaks = seq_len(ncol(freq.df)), labels = axis.labels) + geom_hline(yintercept = 0, color = intercept.line.color) # check wether percentage scale (y-axis) should be reversed if (!reverse.scale) { gp <- gp + scale_y_continuous(breaks = gridbreaks, limits = c(-grid.range[1], grid.range[2]), expand = expgrid, labels = gridlabs) } else { gp <- gp + scale_y_reverse(breaks = gridbreaks, limits = c(grid.range[2], -grid.range[1]), expand = expgrid, labels = gridlabs) } # check whether coordinates should be flipped, i.e. # swap x and y axis if (coord.flip) gp <- gp + coord_flip() # set geom colors sj.setGeomColors( gp, geom.colors, (catcount + adding), show.legend, legend.labels, reverse.colors ) } # is factor with char levels? #' @importFrom sjmisc is_num_fac is_labelled_factor <- function(x) is.factor(x) && !sjmisc::is_num_fac(x) sjPlot/R/plot_kfold_cv.R0000644000176200001440000001534314136600637014712 0ustar liggesusers#' @title Plot model fit from k-fold cross-validation #' @name plot_kfold_cv #' #' @description This function plots the aggregated residuals of k-fold cross-validated #' models against the outcome. This allows to evaluate how the model performs #' according over- or underestimation of the outcome. #' #' @param data A data frame, used to split the data into \code{k} training-test-pairs. #' @param formula A model formula, used to fit linear models (\code{\link[stats]{lm}}) #' over all \code{k} training data sets. Use \code{fit} to specify a #' fitted model (also other models than linear models), which will be used #' to compute cross validation. If \code{fit} is not missing, \code{formula} #' will be ignored. #' @param k Number of folds. #' @param fit Model object, which will be used to compute cross validation. If #' \code{fit} is not missing, \code{formula} will be ignored. Currently, #' only linear, poisson and negative binomial regression models are supported. #' #' @details This function, first, generates \code{k} cross-validated test-training #' pairs and #' fits the same model, specified in the \code{formula}- or \code{fit}- #' argument, over all training data sets. \cr \cr #' Then, the test data is used to predict the outcome from all #' models that have been fit on the training data, and the residuals #' from all test data is plotted against the observed values (outcome) #' from the test data (note: for poisson or negative binomial models, the #' deviance residuals are calculated). This plot can be used to validate the model #' and see, whether it over- (residuals > 0) or underestimates #' (residuals < 0) the model's outcome. #' #' @note Currently, only linear, poisson and negative binomial regression models are supported. #' #' @examples #' data(efc) #' #' plot_kfold_cv(efc, neg_c_7 ~ e42dep + c172code + c12hour) #' plot_kfold_cv(mtcars, mpg ~.) #' #' # for poisson models. need to fit a model and use 'fit'-argument #' fit <- glm(tot_sc_e ~ neg_c_7 + c172code, data = efc, family = poisson) #' plot_kfold_cv(efc, fit = fit) #' #' # and for negative binomial models #' fit <- MASS::glm.nb(tot_sc_e ~ neg_c_7 + c172code, data = efc) #' plot_kfold_cv(efc, fit = fit) #' #' @import ggplot2 #' @importFrom datawizard data_partition #' @importFrom dplyr mutate ungroup summarise #' @importFrom purrr map map2 #' @importFrom tidyr unnest #' @importFrom graphics plot #' @importFrom stats as.formula formula family poisson glm lm predict #' @importFrom purrr map #' @importFrom MASS glm.nb #' @export plot_kfold_cv <- function(data, formula, k = 5, fit) { # make sure that data is a data frame if (!is.data.frame(data)) data <- as.data.frame(data) # check if a formula was passed as argument... if (!missing(formula)) { # make sure we have a formula if (!inherits(formula, "formula")) formula <- stats::as.formula(formula) # reset fam fam <- NULL } else if (!missing(fit)) { # ... or a fitted model formula <- stats::formula(fit) # get model family for glm if (inherits(fit, "glm")) fam <- stats::family(fit) else fam <- NULL } else { stop("Either `formula` or `fit` must be supplied.", call. = F) } # get name of response variable and get variable label, if # there is any... used for labelling plot axis resp <- formula[[2]] resp.name <- sjlabelled::get_label(data[[deparse(resp)]], def.value = deparse(resp)) # check if fit parameter was specified, and we have a model family if (!is.null(fam)) { # for poisson models, show deviance residuals if (fam$family == "poisson") { # create cross-validated test-training pairs, run poisson-model on each # pair, get deviance residuals and response value kfolds <- do.call(rbind, lapply(1:k, function(i) { out <- datawizard::data_partition(data, training_proportion = .8) data.frame(train = I(list(out$training)), test = I(list(out$test))) })) res <- kfolds %>% dplyr::mutate(model = purrr::map(.data$train, ~ stats::glm(formula, data = .x, family = stats::poisson(link = "log")))) %>% dplyr::mutate(residuals = purrr::map(.data$model, ~ stats::residuals(.x, "deviance"))) %>% dplyr::mutate(.response = purrr::map(.data$model, ~ insight::get_response(.x))) # for negative binomial models, show deviance residuals } else if (inherits(fit, "negbin")) { # create cross-validated test-training pairs, run poisson-model on each # pair, get deviance residuals and response value kfolds <- do.call(rbind, lapply(1:k, function(i) { out <- datawizard::data_partition(data, training_proportion = .8) data.frame(train = I(list(out$training)), test = I(list(out$test))) })) res <- kfolds %>% dplyr::mutate(model = purrr::map(.data$train, ~ MASS::glm.nb(formula, data = .))) %>% dplyr::mutate(residuals = purrr::map(.data$model, ~ stats::residuals(.x, "deviance"))) %>% dplyr::mutate(.response = purrr::map(.data$model, ~ insight::get_response(.x))) } # unnest residuals and response values res <- suppressWarnings(res %>% tidyr::unnest(residuals, .data$.response)) } else { # create cross-validated test-training pairs, run linear model on each # pair, get predicted values and quality measures for models fitted on the # train data kfolds <- do.call(rbind, lapply(1:k, function(i) { out <- datawizard::data_partition(data, training_proportion = .8) data.frame(train = I(list(out$training)), test = I(list(out$test))) })) res <- kfolds %>% dplyr::mutate(model = purrr::map(.data$train, ~ stats::lm(formula, data = .))) %>% dplyr::mutate(predicted = purrr::map2(.data$model, .data$test, function(.x, .y) { out <- data.frame(.fitted = stats::predict(.x, newdata = .y)) cbind(.y, out) })) %>% tidyr::unnest(cols = .data$predicted) # make sure that response vector has an identifiably name colnames(res)[which(colnames(res) == deparse(resp))] <- ".response" # compute residuals for each k-fold model res <- res %>% dplyr::mutate(residuals = .data$.response - .data$.fitted) } # plot response against residuals, to see where our model over- or # underestimates the outcome p <- ggplot(data = res, aes_string(x = ".response", y = "residuals")) + geom_hline(yintercept = 0) + geom_point() + stat_smooth(method = "loess") + theme_minimal() + labs(y = "Residuals", x = resp.name) # plot it p } sjPlot/R/sjPlotDist.R0000644000176200001440000004725413543605175014175 0ustar liggesusers#' @title Plot normal distributions #' @name dist_norm #' #' @description This function plots a simple normal distribution or a normal distribution #' with shaded areas that indicate at which value a significant p-level #' is reached. #' #' @param norm Numeric, optional. If specified, a normal distribution with \code{mean} and \code{sd} #' is plotted and a shaded area at \code{norm} value position is plotted that #' indicates whether or not the specified value is significant or not. #' If both \code{norm} and \code{p} are not specified, a distribution without shaded #' area is plotted. #' @param mean Numeric. Mean value for normal distribution. By default 0. #' @param sd Numeric. Standard deviation for normal distribution. By default 1. #' @param p Numeric, optional. If specified, a normal distribution with \code{mean} and \code{sd} #' is plotted and a shaded area at the position where the specified p-level #' starts is plotted. If both \code{norm} and \code{p} are not specified, a distribution #' without shaded area is plotted. #' @param xmax Numeric, optional. Specifies the maximum x-axis-value. If not specified, the x-axis #' ranges to a value where a p-level of 0.00001 is reached. #' @param geom.alpha Specifies the alpha-level of the shaded area. Default is 0.7, range between 0 to 1. #' #' @inheritParams plot_grpfrq #' #' @examples #' # a simple normal distribution #' dist_norm() #' #' # a simple normal distribution with different mean and sd. #' # note that curve looks similar to above plot, but axis range #' # has changed. #' dist_norm(mean = 2, sd = 4) #' #' # a simple normal distribution #' dist_norm(norm = 1) #' #' # a simple normal distribution #' dist_norm(p = 0.2) #' #' @import ggplot2 #' @importFrom stats qchisq pchisq dchisq qf pf df qnorm pnorm dnorm qt pt dt #' @export dist_norm <- function(norm = NULL, mean = 0, sd = 1, p = NULL, xmax = NULL, geom.colors = NULL, geom.alpha = 0.7) { # -------------------------------------- # determine maximum range of x-axis. # -------------------------------------- if (is.null(xmax)) { if (is.null(norm)) { n.max <- stats::qnorm(0.00001, mean, sd, lower.tail = F) } # -------------------------------------- # else, if we have a x-value, take into # account all possible x-valuess that would lead # to a theoretical p-value of 0.00001. # -------------------------------------- else { n.max <- norm while (stats::pnorm(n.max, mean, sd, lower.tail = F) > 0.00001) { n.max <- n.max + 1 } } } else { n.max <- xmax } # -------------------------------------- # create data frame # -------------------------------------- mydat <- data.frame(x = seq(-n.max, n.max, length.out = 20 * n.max)) # density normal distribution mydat$y <- stats::dnorm(mydat$x, mean, sd) # base plot with normal-distribution gp <- ggplot(mydat, aes_string(x = "x", y = "y")) + geom_line() sub.df <- NULL if (!is.null(p)) { # plot area for indicated x-value... sub.df <- mydat[mydat$x > stats::qnorm(p, mean, sd, lower.tail = F), ] } else if (!is.null(norm)) { # resp. for p-value... sub.df <- mydat[mydat$x > norm, ] } if (!is.null(sub.df)) { sub.df$p.level <- ifelse(sub.df$x > stats::qnorm(0.05, mean, sd, lower.tail = F), "sig", "non-sig") cs <- stats::qnorm(0.05, mean, sd, lower.tail = F) gp <- gp + geom_ribbon(data = sub.df, aes_string(ymax = "y", fill = "p.level"), ymin = 0, alpha = geom.alpha) + annotate("text", label = sprintf("x = %.2f", cs), x = cs, y = 0, vjust = 1.3) # add limit of p-value if (!is.null(norm)) { pv <- stats::pnorm(norm, mean, sd, lower.tail = F) if (pv >= 0.05) { gp <- gp + annotate("text", label = sprintf("p = %.2f", pv), x = norm, y = 0, hjust = -0.1, vjust = -0.5, angle = 90) } } } gp <- sj.setGeomColors(gp, geom.colors, pal.len = 2, labels = c("p > 5%", "p < 0.05")) gp <- gp + ylab(NULL) + xlab(NULL) print(gp) } #' @title Plot chi-squared distributions #' @name dist_chisq #' #' @description This function plots a simple chi-squared distribution or a chi-squared distribution #' with shaded areas that indicate at which chi-squared value a significant p-level #' is reached. #' #' @param chi2 Numeric, optional. If specified, a chi-squared distribution with \code{deg.f} degrees #' of freedom is plotted and a shaded area at \code{chi2} value position is plotted that #' indicates whether or not the specified value is significant or not. #' If both \code{chi2} and \code{p} are not specified, a distribution without shaded #' area is plotted. #' @param deg.f Numeric. The degrees of freedom for the chi-squared distribution. Needs to #' be specified. #' @param p Numeric, optional. If specified, a chi-squared distribution with \code{deg.f} degrees #' of freedom is plotted and a shaded area at the position where the specified p-level #' starts is plotted. If both \code{chi2} and \code{p} are not specified, a distribution #' without shaded area is plotted. #' @param xmax Numeric, optional. Specifies the maximum x-axis-value. If not specified, the x-axis #' ranges to a value where a p-level of 0.00001 is reached. #' #' @inheritParams dist_norm #' @inheritParams plot_grpfrq #' #' @examples #' # a simple chi-squared distribution #' # for 6 degrees of freedom #' dist_chisq(deg.f = 6) #' #' # a chi-squared distribution for 6 degrees of freedom, #' # and a shaded area starting at chi-squared value of ten. #' # With a df of 6, a chi-squared value of 12.59 would be "significant", #' # thus the shaded area from 10 to 12.58 is filled as "non-significant", #' # while the area starting from chi-squared value 12.59 is filled as #' # "significant" #' dist_chisq(chi2 = 10, deg.f = 6) #' #' # a chi-squared distribution for 6 degrees of freedom, #' # and a shaded area starting at that chi-squared value, which has #' # a p-level of about 0.125 (which equals a chi-squared value of about 10). #' # With a df of 6, a chi-squared value of 12.59 would be "significant", #' # thus the shaded area from 10 to 12.58 (p-level 0.125 to p-level 0.05) #' # is filled as "non-significant", while the area starting from chi-squared #' # value 12.59 (p-level < 0.05) is filled as "significant". #' dist_chisq(p = 0.125, deg.f = 6) #' #' @import ggplot2 #' @export dist_chisq <- function(chi2 = NULL, deg.f = NULL, p = NULL, xmax = NULL, geom.colors = NULL, geom.alpha = 0.7) { # -------------------------------------- # check parameters # -------------------------------------- if (is.null(deg.f)) { warning("Degrees of freedom ('deg.f') needs to be specified.", call. = F) return(invisible(NULL)) } # -------------------------------------- # determine maximum range of x-axis. if we have # p-value but no chi2-value, distribution should range until # a theoretical p-value of 0.00001 is reached. this should # cover all possible (and visible) chi2-values # -------------------------------------- if (is.null(xmax)) { if (is.null(chi2)) { chisq.max <- stats::qchisq(0.00001, deg.f, lower.tail = F) } # -------------------------------------- # else, if we have a chi2-value, take into # account all possible chi2-values that would lead # to a theoretical p-value of 0.00001. # -------------------------------------- else { chisq.max <- chi2 while (stats::pchisq(chisq.max, deg.f, lower.tail = F) > 0.00001) { chisq.max <- chisq.max + 1 } } } else { chisq.max <- xmax } # -------------------------------------- # create data frame # -------------------------------------- mydat <- data.frame(x = seq(0, chisq.max, length.out = 10 * chisq.max)) # density distribution of chi2 mydat$y <- stats::dchisq(mydat$x, deg.f) # base plot with chi2-distribution gp <- ggplot(mydat, aes_string(x = "x", y = "y")) + geom_line() sub.df <- NULL if (!is.null(p)) { # plot area for indicated chi2-value... sub.df <- mydat[mydat$x > stats::qchisq(p, deg.f, lower.tail = F), ] } else if (!is.null(chi2)) { # resp. for p-value... sub.df <- mydat[mydat$x > chi2, ] } if (!is.null(sub.df)) { sub.df$p.level <- ifelse(sub.df$x > stats::qchisq(0.05, deg.f, lower.tail = F), "sig", "non-sig") cs <- stats::qchisq(0.05, deg.f, lower.tail = F) gp <- gp + geom_ribbon(data = sub.df, aes_string(ymax = "y", fill = "p.level"), ymin = 0, alpha = geom.alpha) + annotate("text", label = as.character(as.expression(substitute(chi^2 == c2, list(c2 = sprintf("%.2f", cs))))), parse = TRUE, x = cs, y = 0, vjust = 1.2) # add limit of p-value if (!is.null(chi2)) { pv <- stats::pchisq(chi2, deg.f, lower.tail = F) if (pv >= 0.05) { gp <- gp + annotate("text", label = sprintf("p = %.2f", pv), x = chi2, y = 0, hjust = -0.1, vjust = -0.5, angle = 90) } } } gp <- sj.setGeomColors(gp, geom.colors, pal.len = 2, labels = c("p > 5%", "p < 0.05")) gp <- gp + ylab(NULL) + xlab("chi-squared value") print(gp) } #' @title Plot F distributions #' @name dist_f #' #' @description This function plots a simple F distribution or an F distribution #' with shaded areas that indicate at which F value a significant p-level #' is reached. #' #' @param f Numeric, optional. If specified, an F distribution with \code{deg.f1} and \code{deg.f2} degrees #' of freedom is plotted and a shaded area at \code{f} value position is plotted that #' indicates whether or not the specified value is significant or not. #' If both \code{f} and \code{p} are not specified, a distribution without shaded #' area is plotted. #' @param deg.f1 Numeric. The first degrees of freedom for the F distribution. Needs to #' be specified. #' @param deg.f2 Numeric. The second degrees of freedom for the F distribution. Needs to #' be specified. #' @param p Numeric, optional. If specified, a F distribution with \code{deg.f1} and \code{deg.f2} degrees #' of freedom is plotted and a shaded area at the position where the specified p-level #' starts is plotted. If both \code{f} and \code{p} are not specified, a distribution #' without shaded area is plotted. #' @param xmax Numeric, optional. Specifies the maximum x-axis-value. If not specified, the x-axis #' ranges to a value where a p-level of 0.00001 is reached. #' #' @inheritParams dist_norm #' @inheritParams plot_grpfrq #' #' @examples #' # a simple F distribution for 6 and 45 degrees of freedom #' dist_f(deg.f1 = 6, deg.f2 = 45) #' #' # F distribution for 6 and 45 degrees of freedom, #' # and a shaded area starting at F value of two. #' # F-values equal or greater than 2.31 are "significant" #' dist_f(f = 2, deg.f1 = 6, deg.f2 = 45) #' #' # F distribution for 6 and 45 degrees of freedom, #' # and a shaded area starting at a p-level of 0.2 #' # (F-Value about 1.5). #' dist_f(p = 0.2, deg.f1 = 6, deg.f2 = 45) #' #' @import ggplot2 #' @export dist_f <- function(f = NULL, deg.f1 = NULL, deg.f2 = NULL, p = NULL, xmax = NULL, geom.colors = NULL, geom.alpha = 0.7) { # -------------------------------------- # check parameters # -------------------------------------- if (is.null(deg.f1) || is.null(deg.f2)) { warning("Both degrees of freedom ('deg.f1' and 'deg.f2') needs to be specified.", call. = F) return(invisible(NULL)) } # -------------------------------------- # determine maximum range of x-axis. if we have # p-value but no f-value, distribution should range until # a theoretical p-value of 0.00001 is reached. this should # cover all possible (and visible) f-values # -------------------------------------- if (is.null(xmax)) { if (is.null(f)) { f.max <- stats::qf(0.00001, deg.f1, deg.f2, lower.tail = F) # -------------------------------------- # else, if we have a f-value, take into # account all possible f-values that would lead # to a theoretical p-value of 0.00001. # -------------------------------------- } else { f.max <- f while (stats::pf(f.max, deg.f1, deg.f2, lower.tail = F) > 0.00001) f.max <- f.max + 1 } } else { f.max <- xmax } # -------------------------------------- # create data frame # -------------------------------------- mydat <- data.frame(x = seq(0, f.max, length.out = 30 * f.max)) # density distribution of f mydat$y <- stats::df(mydat$x, deg.f1, deg.f2) # base plot with f-distribution gp <- ggplot(mydat, aes_string(x = "x", y = "y")) + geom_line() sub.df <- NULL if (!is.null(p)) { # plot area for indicated f-value... sub.df <- mydat[mydat$x > stats::qf(p, deg.f1, deg.f2, lower.tail = F), ] } else if (!is.null(f)) { # resp. for p-value... sub.df <- mydat[mydat$x > f, ] } if (!is.null(sub.df)) { sub.df$p.level <- ifelse(sub.df$x > stats::qf(0.05, deg.f1, deg.f2, lower.tail = F), "sig", "non-sig") fv <- stats::qf(0.05, deg.f1, deg.f2, lower.tail = F) gp <- gp + geom_ribbon(data = sub.df, aes_string(ymax = "y", fill = "p.level"), ymin = 0, alpha = geom.alpha) + annotate("text", label = sprintf("F = %.2f", fv), x = fv, y = 0, vjust = 1.3) # add limit of p-value if (!is.null(f)) { pv <- stats::pf(f, deg.f1, deg.f2, lower.tail = F) if (pv >= 0.05) { gp <- gp + annotate("text", label = sprintf("p = %.2f", pv), x = f, y = 0, hjust = -0.1, vjust = -0.5, angle = 90) } } } gp <- sj.setGeomColors(gp, geom.colors, pal.len = 2, labels = c("p > 5%", "p < 0.05")) gp <- gp + ylab(NULL) + xlab("F-value") print(gp) } #' @title Plot t-distributions #' @name dist_t #' #' @description This function plots a simple t-distribution or a t-distribution #' with shaded areas that indicate at which t-value a significant p-level #' is reached. #' #' @param t Numeric, optional. If specified, a t-distribution with \code{deg.f} degrees #' of freedom is plotted and a shaded area at \code{t} value position is plotted that #' indicates whether or not the specified value is significant or not. #' If both \code{t} and \code{p} are not specified, a distribution without shaded #' area is plotted. #' @param deg.f Numeric. The degrees of freedom for the t-distribution. Needs to #' be specified. #' @param p Numeric, optional. If specified, a t-distribution with \code{deg.f} degrees #' of freedom is plotted and a shaded area at the position where the specified p-level #' starts is plotted. If both \code{t} and \code{p} are not specified, a distribution #' without shaded area is plotted. #' @param xmax Numeric, optional. Specifies the maximum x-axis-value. If not specified, the x-axis #' ranges to a value where a p-level of 0.00001 is reached. #' #' @inheritParams dist_norm #' @inheritParams plot_grpfrq #' #' @examples #' # a simple t-distribution #' # for 6 degrees of freedom #' dist_t(deg.f = 6) #' #' # a t-distribution for 6 degrees of freedom, #' # and a shaded area starting at t-value of one. #' # With a df of 6, a t-value of 1.94 would be "significant". #' dist_t(t = 1, deg.f = 6) #' #' # a t-distribution for 6 degrees of freedom, #' # and a shaded area starting at p-level of 0.4 #' # (t-value of about 0.26). #' dist_t(p = 0.4, deg.f = 6) #' #' @import ggplot2 #' @export dist_t <- function(t = NULL, deg.f = NULL, p = NULL, xmax = NULL, geom.colors = NULL, geom.alpha = 0.7) { # -------------------------------------- # check parameters # -------------------------------------- if (is.null(deg.f)) { warning("Degrees of freedom ('deg.f') needs to be specified.", call. = F) return(invisible(NULL)) } # -------------------------------------- # determine maximum range of x-axis. if we have # p-value but no t-value, distribution should range until # a theoretical p-value of 0.00001 is reached. this should # cover all possible (and visible) t-values # -------------------------------------- if (is.null(xmax)) { if (is.null(t)) { t.max <- stats::qt(0.00001, deg.f, lower.tail = F) } # -------------------------------------- # else, if we have a t-value, take into # account all possible t-values that would lead # to a theoretical p-value of 0.00001. # -------------------------------------- else { t.max <- t while (stats::pt(t.max, deg.f, lower.tail = F) > 0.00001) { t.max <- t.max + 1 } } } else { t.max <- xmax } # -------------------------------------- # create data frame # -------------------------------------- mydat <- data.frame(x = seq(-t.max, t.max, length.out = 20 * t.max)) # density distribution of t mydat$y <- stats::dt(mydat$x, deg.f) # base plot with t-distribution gp <- ggplot(mydat, aes_string(x = "x", y = "y")) + geom_line() sub.df <- NULL if (!is.null(p)) { # plot area for indicated t-value... sub.df <- mydat[mydat$x > stats::qt(p, deg.f, lower.tail = F), ] } else if (!is.null(t)) { # resp. for p-value... sub.df <- mydat[mydat$x > t, ] } if (!is.null(sub.df)) { sub.df$p.level <- ifelse(sub.df$x > stats::qt(0.05, deg.f, lower.tail = F), "sig", "non-sig") tv <- stats::qt(0.05, deg.f, lower.tail = F) gp <- gp + geom_ribbon(data = sub.df, aes_string(ymax = "y", fill = "p.level"), ymin = 0, alpha = geom.alpha) + annotate("text", label = sprintf("t = %.2f", tv), x = tv, y = 0, vjust = 1.3) # add limit of p-value if (!is.null(t)) { pv <- stats::pt(t, deg.f, lower.tail = F) if (pv >= 0.05) { gp <- gp + annotate("text", label = sprintf("p = %.2f", pv), x = t, y = 0, hjust = -0.1, vjust = -0.5, angle = 90) } } } gp <- sj.setGeomColors(gp, geom.colors, pal.len = 2, labels = c("p > 5%", "p < 0.05")) gp <- gp + ylab(NULL) + xlab("t-value") print(gp) } sjPlot/R/color_utils.R0000644000176200001440000000425613446531454014427 0ustar liggesusers#' @importFrom scales brewer_pal grey_pal col_check2 <- function(geom.colors, collen) { # -------------------------------------------- # check color argument # -------------------------------------------- # check for corrct color argument if (!is.null(geom.colors)) { # check for color brewer palette if (is.brewer.pal(geom.colors[1])) { geom.colors <- scales::brewer_pal(palette = geom.colors[1])(collen) } else if (is.sjplot.pal(geom.colors[1])) { geom.colors <- get_sjplot_colorpalette(geom.colors[1], collen) # do we have correct amount of colours? } else if (geom.colors[1] == "gs") { geom.colors <- scales::grey_pal()(collen) # do we have correct amount of colours? } else if (geom.colors[1] == "bw") { geom.colors <- rep("black", times = collen) # do we have correct amount of colours? } else if (length(geom.colors) > collen) { # shorten palette geom.colors <- geom.colors[1:collen] } else if (length(geom.colors) < collen) { # repeat color palette geom.colors <- rep(geom.colors, times = collen) # shorten to required length geom.colors <- geom.colors[1:collen] } } else { geom.colors <- scales::brewer_pal(palette = "Set1")(collen) } geom.colors } # check whether a color value is indicating # a color brewer palette is.brewer.pal <- function(pal) { bp.seq <- c("BuGn", "BuPu", "GnBu", "OrRd", "PuBu", "PuBuGn", "PuRd", "RdPu", "YlGn", "YlGnBu", "YlOrBr", "YlOrRd", "Blues", "Greens", "Greys", "Oranges", "Purples", "Reds") bp.div <- c("BrBG", "PiYG", "PRGn", "PuOr", "RdBu", "RdGy", "RdYlBu", "RdYlGn", "Spectral") bp.qul <- c("Accent", "Dark2", "Paired", "Pastel1", "Pastel2", "Set1", "Set2", "Set3") bp <- c(bp.seq, bp.div, bp.qul) pal %in% bp } is.sjplot.pal <- function(pal) { pal %in% names(sjplot_colors) } get_sjplot_colorpalette <- function(pal, len) { col <- sjplot_colors[[pal]] if (len > length(col)) { warning("More colors requested than length of color palette.", call. = F) len <- length(col) } col[1:len] } sjPlot/R/tab_fa.R0000644000176200001440000005620014147747720013305 0ustar liggesusers#' @title Summary of factor analysis as HTML table #' @name tab_fa #' #' @description Performs a factor analysis on a data frame or matrix #' and displays the factors as HTML #' table, or saves them as file. \cr \cr In case a data frame is used as #' parameter, the Cronbach's Alpha value for each factor scale will be calculated, #' i.e. all variables with the highest loading for a factor are taken for the #' reliability test. The result is an alpha value for each factor dimension. #' #' @param show.comm Logical, if \code{TRUE}, show the communality column in the table. #' @param method the factoring method to be used. \code{"ml"} will do a maximum likelihood factor analysis (default). #' \code{"minres"} will do a minimum residual (OLS), #' \code{"wls"} will do a weighted least squares (WLS) solution, #' \code{"gls"} does a generalized weighted least squares (GLS), #' \code{"pa"} will do the principal factor solution, #' \code{"minchi"} will minimize the sample size weighted chi square #' when treating pairwise correlations with different number of #' subjects per pair. \code{"minrank"} will do a minimum rank factor analysis. #' @param sort logical, if \code{TRUE}, sort the loadings for each factors #' (items will be sorted in terms of their greatest loading, in descending #' order) #' #' @inheritParams tab_pca #' @inheritParams tab_model #' @inheritParams tab_df #' @inheritParams tab_xtab #' @inheritParams plot_grpfrq #' @inheritParams tab_corr #' #' @return Invisibly returns #' \itemize{ #' \item the web page style sheet (\code{page.style}), #' \item the web page content (\code{page.content}), #' \item the complete html-output (\code{page.complete}), #' \item the html-table with inline-css for use with knitr (\code{knitr}), #' \item the \code{factor.index}, i.e. the column index of each variable with the highest factor loading for each factor and #' \item the \code{removed.items}, i.e. which variables have been removed because they were outside of the \code{fctr.load.tlrn}'s range. #' } #' for further use. #' #' @note This method for factor analysis relies on the functions #' \code{\link[psych]{fa}} and \code{\link[psych]{fa.parallel}} from the psych package. #' #' #' @examples #' \dontrun{ #' # Data from the EUROFAMCARE sample dataset #' library(sjmisc) #' library(GPArotation) #' data(efc) #' #' # recveive first item of COPE-index scale #' start <- which(colnames(efc) == "c82cop1") #' # recveive last item of COPE-index scale #' end <- which(colnames(efc) == "c90cop9") #' # auto-detection of labels #' if (interactive()) { #' tab_fa(efc[, start:end]) #' }} #' @export tab_fa <- function(data, rotation = "promax", method = c("ml", "minres", "wls", "gls", "pa", "minchi", "minrank"), nmbr.fctr = NULL, fctr.load.tlrn = 0.1, sort = FALSE, title = "Factor Analysis", var.labels = NULL, wrap.labels = 40, show.cronb = TRUE, show.comm = FALSE, alternate.rows = FALSE, digits = 2, CSS = NULL, encoding = NULL, file = NULL, use.viewer = TRUE, remove.spaces = TRUE) { # ------------------------------------- # check encoding # ------------------------------------- encoding <- get.encoding(encoding, data) # check arguments method <- match.arg(method) if (!requireNamespace("psych", quietly = TRUE)) { stop("Package 'psych' required for this function to work. Please install it.", call. = FALSE) } # -------------------------------------------------------- # try to automatically set labels is not passed as parameter # -------------------------------------------------------- if (is.null(var.labels) && is.data.frame(data)) { var.labels <- sjlabelled::get_label(data, def.value = colnames(data)) } # ---------------------------- # check if user has passed a data frame # or a pca object # ---------------------------- if (inherits(data, "fa")) { if (sort == TRUE) { fadata <- psych::fa.sort(data) #resort loadings } else { fadata <- data } dataframeparam <- FALSE } else if (is.data.frame(data)) { if (is.null(nmbr.fctr)) { nr_factors <- psych::fa.parallel(data, fa = 'fa', fm = method)$nfact dev.off() fadata <- psych::fa(data, nfactors = nr_factors, fm = method, rotate = rotation) if (sort == TRUE) { fadata <- psych::fa.sort(fadata) #resort loadings } } else { fadata <- psych::fa(data, nfactors = nmbr.fctr, fm = method, rotate = rotation) if (sort == TRUE) { fadata <- psych::fa.sort(fadata) #resort loadings } } dataframeparam <- TRUE } # ------------------------------------- # init header # ------------------------------------- toWrite <- sprintf("\n\n\n", encoding) # ------------------------------------- # init style sheet and tags used for css-definitions # we can use these variables for string-replacement # later for return value # ------------------------------------- tag.table <- "table" tag.caption <- "caption" tag.thead <- "thead" tag.tdata <- "tdata" tag.centeralign <- "centeralign" tag.rightalign <- "rightalign" tag.cronbach <- "cronbach" tag.comm <- "comm" tag.rotation <- "rotation" tag.kmo <- "kmo" tag.arc <- "arc" tag.minval <- "minval" tag.removable <- "removable" tag.firsttablerow <- "firsttablerow" tag.firsttablecol <- "firsttablecol" css.table <- "border-collapse:collapse; border:none;" css.caption <- "font-weight: bold; text-align:left;" css.thead <- "border-top:double black; padding:0.2cm;" css.tdata <- "padding:0.2cm;" css.centeralign <- "text-align:center;" css.rightalign <- "text-align:right;" css.cronbach <- "font-style:italic; border-bottom:double;" css.comm <- "font-style:italic; color:#666666;" css.kmo <- "font-style:italic;" css.rotation <- "font-style:italic; font-size:0.9em;" css.minval <- "color:#cccccc;" css.arc <- "background-color:#eaeaea;" css.removable <- "background-color:#eacccc;" css.firsttablerow <- "border-top:1px solid black;" css.firsttablecol <- "" if (!show.comm && show.cronb) css.cronbach <- sprintf("%s border-bottom:double;", css.cronbach) # ------------------------ # check user defined style sheets # ------------------------ if (!is.null(CSS)) { if (!is.null(CSS[['css.table']])) css.table <- ifelse(substring(CSS[['css.table']], 1, 1) == '+', paste0(css.table, substring(CSS[['css.table']], 2)), CSS[['css.table']]) if (!is.null(CSS[['css.thead']])) css.thead <- ifelse(substring(CSS[['css.thead']], 1, 1) == '+', paste0(css.thead, substring(CSS[['css.thead']], 2)), CSS[['css.thead']]) if (!is.null(CSS[['css.tdata']])) css.tdata <- ifelse(substring(CSS[['css.tdata']], 1, 1) == '+', paste0(css.tdata, substring(CSS[['css.tdata']], 2)), CSS[['css.tdata']]) if (!is.null(CSS[['css.caption']])) css.caption <- ifelse(substring(CSS[['css.caption']], 1, 1) == '+', paste0(css.caption, substring(CSS[['css.caption']], 2)), CSS[['css.caption']]) if (!is.null(CSS[['css.centeralign']])) css.centeralign <- ifelse(substring(CSS[['css.centeralign']], 1, 1) == '+', paste0(css.centeralign, substring(CSS[['css.centeralign']], 2)), CSS[['css.centeralign']]) if (!is.null(CSS[['css.rightalign']])) css.rightalign <- ifelse(substring(CSS[['css.rightalign']], 1, 1) == '+', paste0(css.rightalign, substring(CSS[['css.rightalign']], 2)), CSS[['css.rightalign']]) if (!is.null(CSS[['css.arc']])) css.arc <- ifelse(substring(CSS[['css.arc']], 1, 1) == '+', paste0(css.arc, substring(CSS[['css.arc']], 2)), CSS[['css.arc']]) if (!is.null(CSS[['css.firsttablerow']])) css.firsttablerow <- ifelse(substring(CSS[['css.firsttablerow']], 1, 1) == '+', paste0(css.firsttablerow, substring(CSS[['css.firsttablerow']], 2)), CSS[['css.firsttablerow']]) if (!is.null(CSS[['css.firsttablecol']])) css.firsttablecol <- ifelse(substring(CSS[['css.firsttablecol']], 1, 1) == '+', paste0(css.firsttablecol, substring(CSS[['css.firsttablecol']], 2)), CSS[['css.firsttablecol']]) if (!is.null(CSS[['css.cronbach']])) css.cronbach <- ifelse(substring(CSS[['css.cronbach']], 1, 1) == '+', paste0(css.cronbach, substring(CSS[['css.cronbach']], 2)), CSS[['css.cronbach']]) if (!is.null(CSS[['css.comm']])) css.comm <- ifelse(substring(CSS[['css.comm']], 1, 1) == '+', paste0(css.comm, substring(CSS[['css.comm']], 2)), CSS[['css.comm']]) if (!is.null(CSS[['css.kmo']])) css.kmo <- ifelse(substring(CSS[['css.kmo']], 1, 1) == '+', paste0(css.kmo, substring(CSS[['css.kmo']], 2)), CSS[['css.kmo']]) if (!is.null(CSS[['css.rotation']])) css.rotation <- ifelse(substring(CSS[['css.rotation']], 1, 1) == '+', paste0(css.rotation, substring(CSS[['css.rotation']], 2)), CSS[['css.rotation']]) if (!is.null(CSS[['css.minval']])) css.minval <- ifelse(substring(CSS[['css.minval']], 1, 1) == '+', paste0(css.minval, substring(CSS[['css.minval']], 2)), CSS[['css.minval']]) if (!is.null(CSS[['css.removable']])) css.removable <- ifelse(substring(CSS[['css.removable']], 1, 1) == '+', paste0(css.removable, substring(CSS[['css.removable']], 2)), CSS[['css.removable']]) } # ------------------------ # set page style # ------------------------ page.style <- sprintf("", tag.table, css.table, tag.caption, css.caption, tag.thead, css.thead, tag.tdata, css.tdata, tag.cronbach, css.cronbach, tag.minval, css.minval, tag.removable, css.removable, tag.firsttablerow, css.firsttablerow, tag.firsttablecol, css.firsttablecol, tag.centeralign, css.centeralign, tag.rightalign, css.rightalign, tag.rotation, css.rotation, tag.comm, css.comm, tag.kmo, css.kmo, tag.arc, css.arc) # ------------------------ # start content # ------------------------ toWrite <- paste0(toWrite, page.style) toWrite = paste(toWrite, "\n\n", "\n") # create data frame with factor loadings loadings <- fadata$loadings[] names <- rownames(fadata$loadings) df <- as.data.frame(loadings, row.names = names) # ---------------------------- # check if user defined labels have been supplied # if not, use variable names from data frame # ---------------------------- if (is.null(var.labels)) var.labels <- row.names(df) # ---------------------------- # Prepare length of labels # ---------------------------- if (!is.null(var.labels)) { # wrap long variable labels var.labels <- sjmisc::word_wrap(var.labels, wrap.labels, "
") # resort labels when sort == TRUE if (sort == TRUE) { var.labels <- var.labels[fadata$order] } } # -------------------------------------------------------- # this function checks which items have unclear factor loadings, # i.e. which items do not strongly load on a single factor but # may load almost equally on several factors # -------------------------------------------------------- getRemovableItems <- function(dataframe) { # clear vector removers <- c() # iterate each row of the data frame. each row represents # one item with its factor loadings for (i in seq_len(nrow(dataframe))) { # get factor loadings for each item rowval <- as.numeric(abs(df[i, ])) # retrieve highest loading maxload <- max(rowval) # retrieve 2. highest loading max2load <- sort(rowval, TRUE)[2] # check difference between both if (abs(maxload - max2load) < fctr.load.tlrn) { # if difference is below the tolerance, # remeber row-ID so we can remove that items # for further PCA with updated data frame removers <- c(removers, i) } } # return a vector with index numbers indicating which items # have unclear loadings return(removers) } # -------------------------------------------------------- # this function retrieves a list with the column index ("factor" index) # where each case of the data frame has its highedt factor loading. # So we know to which "group" (factor dimension) each case of the # data frame belongs to according to the pca results # -------------------------------------------------------- getItemLoadings <- function(dataframe) { # return a vector with index numbers indicating which items # loads the highest on which factor return(apply(dataframe, 1, function(x) which.max(abs(x)))) } # -------------------------------------------------------- # this function calculates the cronbach's alpha value for # each factor scale, i.e. all variables with the highest loading # for a factor are taken for the reliability test. The result is # an alpha value for each factor dimension # -------------------------------------------------------- getCronbach <- function(dataframe, itemloadings) { # clear vector cbv <- c() # iterate all highest factor loadings of items for (n in seq_len(length(unique(itemloadings)))) { # calculate cronbach's alpha for those cases that all have the # highest loading on the same factor cbv <- c(cbv, performance::cronbachs_alpha(stats::na.omit(dataframe[, which(itemloadings == n)]))) } # cbv now contains the factor numbers and the related alpha values # for each "factor dimension scale" return(cbv) } # ---------------------------------- # Cronbach's Alpha can only be calculated when having a data frame # with each component / variable as column # ---------------------------------- if (dataframeparam) { # get alpha values alphaValues <- getCronbach(data, getItemLoadings(df)) } else { message("Cronbach's Alpha can only be calculated when having a data frame with each component / variable as column.") alphaValues <- NULL show.cronb <- FALSE } # ------------------------------------- # retrieve those items that have unclear factor loadings, i.e. # which almost load equally on several factors. The tolerance # that indicates which difference between factor loadings is # considered as "equally" is defined via fctr.load.tlrn # ------------------------------------- removableItems <- getRemovableItems(df) # ------------------------------------- # retrieve kmo and msa for data set # ------------------------------------- #kmo <- NULL # not implemented at the moment #if (show.msa) kmo <- psych::KMO(data) # ------------------------------------- # convert data frame, add label names # ------------------------------------- maxdf <- apply(df, 1, function(x) max(abs(x))) # ------------------------------------- # start table tag # ------------------------------------- page.content <- "\n" # ------------------------------------- # table caption, variable label # ------------------------------------- if (!is.null(title)) page.content <- paste0(page.content, sprintf(" \n", title)) # ------------------------------------- # header row # ------------------------------------- # write tr-tag page.content <- paste0(page.content, " \n") # first column page.content <- paste0(page.content, " \n") # iterate columns for (i in seq_len(ncol(df))) { page.content <- paste0(page.content, sprintf(" \n", i)) } # check if communality column should be shown if (show.comm) page.content <- paste0(page.content, " \n") # close table row page.content <- paste0(page.content, " \n") # ------------------------------------- # data rows # ------------------------------------- # iterate all rows of df for (i in seq_len(nrow(df))) { # start table row rowcss <- "" # check for removable items in first row if (i %in% removableItems && i == 1) rowcss <- " firsttablerow removable" # check for removable items in other rows if (i %in% removableItems && i != 1) rowcss <- " removable" # check for non-removable items in first row if (is.na(match(i, removableItems)) && i == 1) rowcss <- " firsttablerow" # default row string for alternative row colors arcstring <- "" # if we have alternating row colors, set css if (alternate.rows) arcstring <- ifelse(sjmisc::is_even(i), " arc", "") # write tr-tag with class-attributes page.content <- paste0(page.content, " \n") # print first table cell page.content <- paste0(page.content, sprintf(" \n", arcstring, rowcss, var.labels[i])) # iterate all columns for (j in seq_len(ncol(df))) { # start table column colcss <- sprintf(" class=\"tdata centeralign%s%s\"", arcstring, rowcss) if (maxdf[[i]] != max(abs(df[i, j]))) colcss <- sprintf(" class=\"tdata centeralign minval%s%s\"", arcstring, rowcss) page.content <- paste0(page.content, sprintf(" %.*f\n", colcss, digits, df[i, j])) } # check if comm column should be shown if (show.comm) page.content <- paste0(page.content, sprintf(" \n", arcstring, rowcss, digits, fadata$communalities[[i]])) # close row page.content <- paste0(page.content, " \n") } # # # # ------------------------------------- # # Total Communalities # not implemented at the moment # # ------------------------------------- if (show.comm) { # write tr-tag with class-attributes page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, sprintf(" \n", ncol(df))) page.content <- paste0(page.content, sprintf(" \n", digits, sum(fadata$communalities))) page.content <- paste0(page.content, " \n") } # ------------------------------------- # cronbach's alpha # ------------------------------------- if (show.cronb && !is.null(alphaValues)) { # write tr-tag with class-attributes page.content <- paste0(page.content, " \n") # first column page.content <- paste0(page.content, " \n") # iterate alpha-values for (i in seq_len(length(alphaValues))) { page.content <- paste0(page.content, sprintf(" \n", digits, alphaValues[i])) } # check if comm column should be shown if (show.comm) page.content <- paste0(page.content, " \n") page.content <- paste0(page.content, " \n") } # ------------------------------------- # finish table # ------------------------------------- page.content <- paste(page.content, "\n
%s
 Factor %iCommunality
%s%.*f
Total Communalities%.*f
Cronbach's α%.*f
") # ------------------------------------- # finish html page # ------------------------------------- toWrite <- paste(toWrite, page.content, "\n") toWrite <- paste0(toWrite, "") # ------------------------------------- # create list with factor loadings that indicate # on which column inside the data frame the highest # loading is # ------------------------------------- factorindex <- apply(df, 1, function(x) which.max(abs(x))) # ------------------------------------- # replace class attributes with inline style, # useful for knitr # ------------------------------------- # copy page content # ------------------------------------- knitr <- page.content # ------------------------------------- # set style attributes for main table tags # ------------------------------------- knitr <- gsub("class=", "style=", knitr, fixed = TRUE, useBytes = TRUE) knitr <- gsub(" 1) { # copy all component correlation values to a data frame df.cc <- data.frame(matrix(unlist(df.comcor), nrow = nrow(df), byrow = FALSE)) # give proper columm names colnames(df.cc) <- sprintf("Component %i", seq_len(ncol(df.cc))) # compute correlation table, store html result html2 <- tab_corr( df.cc, na.deletion = "listwise", p.numeric = TRUE, triangle = "lower", string.diag = sprintf("α=%.3f", unlist(cronbach.total)), encoding = encoding ) } } if (!is.null(html2)) { html$knitr <- paste0(html$knitr, "

 

", html2$knitr) html$page.content <- paste0(html$page.content, "

 

", html2$page.content) html$page.style <- paste0(html$page.style, html2$page.style) html$page.complete <- sprintf( "\n\n\n%s\n\n\n%s\n", encoding, html$page.style, html$page.content ) } html$df.list <- df.ia html$index.scores <- df.index.scores html$cronbach.values <- cronbach.total html$ideal.item.diff <- diff.ideal.list sjlabelled::set_label(html$index.scores) <- purrr::map2_chr(mic.total, cronbach.total, ~ sprintf( "Mean icc=%.3f; Cronbach's Alpha=%.3f", .x, .y )) html } #' @rdname tab_itemscale #' @export sjt.itemanalysis <- tab_itemscale sjPlot/R/plot_grid.R0000644000176200001440000000575013612621114014040 0ustar liggesusers#' @title Arrange list of plots as grid #' @name plot_grid #' #' @description Plot multiple ggplot-objects as a grid-arranged single plot. #' #' @param x A list of ggplot-objects. See 'Details'. #' @param margin A numeric vector of length 4, indicating the top, right, bottom #' and left margin for each plot, in centimetres. #' @param tags Add tags to your subfigures. Can be \code{TRUE} (letter tags) #' or character vector containing tags labels. #' #' @return An object of class \code{gtable}. #' #' @details This function takes a \code{list} of ggplot-objects as argument. #' Plotting functions of this package that produce multiple plot #' objects (e.g., when there is an argument \code{facet.grid}) usually #' return multiple plots as list (the return value is named \code{plot.list}). #' To arrange these plots as grid as a single plot, use \code{plot_grid}. #' #' @examples #' if (require("dplyr") && require("gridExtra")) { #' library(ggeffects) #' data(efc) #' #' # fit model #' fit <- glm( #' tot_sc_e ~ c12hour + e17age + e42dep + neg_c_7, #' data = efc, #' family = poisson #' ) #' #' # plot marginal effects for each predictor, each as single plot #' p1 <- ggpredict(fit, "c12hour") %>% #' plot(show.y.title = FALSE, show.title = FALSE) #' p2 <- ggpredict(fit, "e17age") %>% #' plot(show.y.title = FALSE, show.title = FALSE) #' p3 <- ggpredict(fit, "e42dep") %>% #' plot(show.y.title = FALSE, show.title = FALSE) #' p4 <- ggpredict(fit, "neg_c_7") %>% #' plot(show.y.title = FALSE, show.title = FALSE) #' #' # plot grid #' plot_grid(list(p1, p2, p3, p4)) #' #' # plot grid #' plot_grid(list(p1, p2, p3, p4), tags = TRUE) #' } #' @export plot_grid <- function(x, margin = c(1, 1, 1, 1), tags = NULL) { # check package availability ----- if (!requireNamespace("gridExtra", quietly = TRUE)) { stop("Package `gridExtra` needed for this function to work. Please install it.", call. = F) } # if user did not pass plot.list value, but the complete object returned # by sjPlot-functions, get plot-list then if (!inherits(x, "list") || inherits(x, "sjPlot")) x <- x[["plot.list"]] # add margin to each plot, so no axis labels are cropped x <- lapply(x, function(pl) { pl + theme(plot.margin = unit(margin, "cm")) }) tags_labels <- NULL # Add tags if (isTRUE(tags)) { tags_labels = LETTERS } else{ if (length(tags) < length(x)) { warning("Not enough tags labels in list. Using letters instead.") tags_labels = LETTERS } else{ tags_labels = tags } } if (!is.null(tags_labels)) { for (i in 1:length(x)) { x[[i]] <- x[[i]] + labs(tag = tags_labels[i]) } } # compute amount of columns and rows ncol <- round(sqrt(length(x))) nrow <- ceiling(length(x) / ncol) f <- eval(bquote(gridExtra::"grid.arrange")) do.call(f, c(x, nrow = nrow, ncol = ncol)) } sjPlot/R/plot_residuals.R0000644000176200001440000001102314104233216015073 0ustar liggesusers#' @title Plot predicted values and their residuals #' @name plot_residuals #' #' @description This function plots observed and predicted values of the response #' of linear (mixed) models for each coefficient and highlights the #' observed values according to their distance (residuals) to the #' predicted values. This allows to investigate how well actual and #' predicted values of the outcome fit across the predictor variables. #' #' @param fit Fitted linear (mixed) regression model (including objects of class #' \code{\link[nlme]{gls}} or \code{plm}). #' @param show.lines Logical, if \code{TRUE}, a line connecting predicted and #' residual values is plotted. Set this argument to \code{FALSE}, if #' plot-building is too time consuming. #' @param show.resid Logical, if \code{TRUE}, residual values are plotted. #' @param show.pred Logical, if \code{TRUE}, predicted values are plotted. #' @param remove.estimates Numeric vector with indices (order equals to row index of \code{coef(fit)}) #' or character vector with coefficient names that indicate which estimates should be removed #' from the table output. The first estimate is the intercept, followed by the model predictors. #' \emph{The intercept cannot be removed from the table output!} \code{remove.estimates = c(2:4)} #' would remove the 2nd to the 4th estimate (1st to 3rd predictor after intercept) from the output. #' \code{remove.estimates = "est_name"} would remove the estimate \emph{est_name}. Default #' is \code{NULL}, i.e. all estimates are printed. #' #' @inheritParams plot_model #' @inheritParams plot_scatter #' @inheritParams plot_grpfrq #' #' @return A ggplot-object. #' #' @note The actual (observed) values have a coloured fill, while the predicted #' values have a solid outline without filling. #' #' @examples #' data(efc) #' # fit model #' fit <- lm(neg_c_7 ~ c12hour + e17age + e42dep, data = efc) #' #' # plot residuals for all independent variables #' plot_residuals(fit) #' #' # remove some independent variables from output #' plot_residuals(fit, remove.estimates = c("e17age", "e42dep")) #' #' @importFrom rlang .data #' @importFrom insight get_data #' @export plot_residuals <- function(fit, geom.size = 2, remove.estimates = NULL, show.lines = TRUE, show.resid = TRUE, show.pred = TRUE, show.ci = FALSE) { # show lines only when both residual and predicted # values are plotted - else, lines make no sense if (!show.pred || !show.resid) show.lines <- FALSE # Obtain predicted and residual values mydat <- insight::get_data(fit) # check whether estimates should be removed from plot if (!is.null(remove.estimates)) { keep <- which(!colnames(mydat) %in% remove.estimates) } else { keep <- seq_len(ncol(mydat)) } mydat$predicted <- stats::predict(fit) mydat$residuals <- stats::residuals(fit) # get name of response, used in ggplot-aes rv <- insight::find_response(fit) # remove estimates, if required dummy <- mydat %>% dplyr::select(keep, .data$predicted, .data$residuals) # set default variable labels, used as column names, so labelled # data variable labels appear in facet grid header. sel <- 2:length(keep) var.labels <- sjlabelled::get_label(dummy, def.value = colnames(dummy)[sel])[sel] if (is.null(var.labels) || all(var.labels == "")) var.labels <- colnames(dummy)[sel] colnames(dummy)[sel] <- var.labels # melt data mydat <- suppressWarnings(dummy %>% tidyr::gather(key = "grp", value = "x", -1, -.data$predicted, -.data$residuals)) colnames(mydat)[1] <- ".response" # melt data, build basic plot res.plot <- ggplot(mydat, aes(x = .data$x, y = .data$.response)) + stat_smooth(method = "lm", se = show.ci, colour = "grey70") if (show.lines) res.plot <- res.plot + geom_segment(aes(xend = .data$x, yend = .data$predicted), alpha = .3) if (show.resid) res.plot <- res.plot + geom_point(aes(fill = .data$residuals), size = geom.size, shape = 21, colour = "grey50") if (show.pred) res.plot <- res.plot + geom_point(aes(y = .data$predicted), shape = 1, size = geom.size) # residual plot res.plot <- res.plot + facet_grid(~grp, scales = "free") + scale_fill_gradient2(low = "#003399", mid = "white", high = "#993300") + guides(color = "none", fill = "none") + labs(x = NULL, y = sjlabelled::get_label(mydat[[1]], def.value = rv)) res.plot } sjPlot/R/sjPlotPearsonsChi2Test.R0000644000176200001440000001156614104233216016412 0ustar liggesusers#' @title Plot Pearson's Chi2-Test of multiple contingency tables #' @name sjp.chi2 #' #' @description Plot p-values of Pearson's Chi2-tests for multiple contingency tables as ellipses or tiles. #' Requires a data frame with dichotomous (dummy) variables. #' Calculation of Chi2-matrix taken from #' \href{https://talesofr.wordpress.com/2013/05/05/ridiculously-photogenic-factors-heatmap-with-p-values/}{Tales of R}. #' #' @param df A data frame with (dichotomous) factor variables. #' #' @return A ggplot-object. #' #' @inheritParams plot_grpfrq #' #' @examples #' # create data frame with 5 dichotomous (dummy) variables #' mydf <- data.frame(as.factor(sample(1:2, 100, replace=TRUE)), #' as.factor(sample(1:2, 100, replace=TRUE)), #' as.factor(sample(1:2, 100, replace=TRUE)), #' as.factor(sample(1:2, 100, replace=TRUE)), #' as.factor(sample(1:2, 100, replace=TRUE))) #' # create variable labels #' items <- list(c("Item 1", "Item 2", "Item 3", "Item 4", "Item 5")) #' #' # plot Chi2-contingency-table #' sjp.chi2(mydf, axis.labels = items) #' #' @import ggplot2 #' @importFrom grDevices rgb #' @importFrom dplyr bind_rows #' @export sjp.chi2 <- function(df, title = "Pearson's Chi2-Test of Independence", axis.labels = NULL, wrap.title = 50, wrap.labels = 20, show.legend = FALSE, legend.title = NULL) { # -------------------------------------------------------- # try to automatically set labels is not passed as parameter # -------------------------------------------------------- if (is.null(axis.labels)) { axis.labels <- sjlabelled::get_label(df, def.value = colnames(df)) } # ---------------------------------------------------------------- # Calculation of Chi2-matrix taken from following blog-posting: # http://talesofr.wordpress.com/2013/05/05/ridiculously-photogenic-factors-heatmap-with-p-values/ # ---------------------------------------------------------------- combos <- expand.grid(rep(list(seq_len(ncol(df))), 2)) # combinations with repetitions combos <- as.matrix(combos) combos <- t(combos) # transpose matrix # ---------------------------------------------------------------- # when 2 variables are *not* significant, they are independent # ---------------------------------------------------------------- m <- data.frame() for (i in seq_len(ncol(combos))) { test <- chisq.test(df[, combos[1, i]], df[, combos[2, i]]) out <- data.frame(Row = colnames(df)[combos[1, i]], Column = colnames(df)[combos[2, i]], Chi.Square = round(test$statistic, 4), df = test$parameter, p.value = round(test$p.value, 4), stringsAsFactors = FALSE) m <- suppressWarnings(dplyr::bind_rows(m, out)) } # ---------------------------- # check if user defined labels have been supplied # if not, use variable names from data frame # ---------------------------- if (is.null(axis.labels)) axis.labels <- row.names(m) # -------------------------------------------------------- # unlist labels # -------------------------------------------------------- if (!is.null(axis.labels) && is.list(axis.labels)) { axis.labels <- unlistlabels(axis.labels) } # ---------------------------- # Prepare length of title and labels # ---------------------------- # check length of diagram title and split longer string at into new lines if (!is.null(title)) title <- sjmisc::word_wrap(title, wrap.title) # check length of x-axis-labels and split longer strings at into new lines if (!is.null(axis.labels)) axis.labels <- sjmisc::word_wrap(axis.labels, wrap.labels) # -------------------------------------------------------- # start with base plot object here # -------------------------------------------------------- chiPlot <- ggplot(data = m, aes_string(x = "Row", y = "Column", fill = "p.value", label = "p.value")) + geom_tile() + scale_x_discrete(labels = axis.labels) + scale_y_discrete(labels = axis.labels) + scale_fill_gradient2(low = grDevices::rgb(128, 205, 193, maxColorValue = 255), mid = "white", high = grDevices::rgb(5, 113, 176, maxColorValue = 255), midpoint = 0.05) + geom_text(label = sprintf("%.3f", m$p.value)) + labs(title = title, x = NULL, y = NULL, fill = legend.title) # --------------------------------------------------------- # hide legend? # --------------------------------------------------------- if (!show.legend) chiPlot <- chiPlot + guides(fill = "none") chiPlot } sjPlot/R/plot_diag_linear.R0000644000176200001440000001643113612122336015351 0ustar liggesusersplot_diag_linear <- function(model, geom.colors, dot.size, line.size, ...) { plot.list <- list() geom.colors <- col_check2(geom.colors, 2) p <- diag_vif(model) if (!is.null(p)) plot.list[[length(plot.list) + 1]] <- p p <- diag_qq(model, geom.colors, dot.size, line.size) if (!is.null(p)) plot.list[[length(plot.list) + 1]] <- p p <- diag_reqq(model, dot.size) if (!is.null(p)) plot.list[[length(plot.list) + 1]] <- p p <- diag_norm(model, geom.colors) if (!is.null(p)) plot.list[[length(plot.list) + 1]] <- p p <- diag_ncv(model, dot.size, line.size) if (!is.null(p)) plot.list[[length(plot.list) + 1]] <- p plot.list } plot_diag_glm <- function(model, geom.colors, dot.size, line.size, ...) { geom.colors <- col_check2(geom.colors, 2) diag_reqq(model, dot.size) } #' @importFrom stats residuals fitted diag_ncv <- function(model, dot.size, line.size) { if (is.null(dot.size)) dot.size <- 1 if (is.null(line.size)) line.size <- 1 dat <- data.frame( res = stats::residuals(model), fitted = stats::fitted(model) ) ggplot(dat, aes_string(x = "fitted", y = "res")) + geom_intercept_line2(0, NULL) + geom_point(size = dot.size) + geom_smooth(method = "loess", se = FALSE, size = line.size) + labs( x = "Fitted values", y = "Residuals", title = "Homoscedasticity (constant variance of residuals)", subtitle = "Amount and distance of points scattered above/below line is equal or randomly spread" ) } #' @importFrom rlang .data #' @importFrom stats residuals sd diag_norm <- function(model, geom.colors) { res_ <- data.frame(res = stats::residuals(model)) ggplot(res_, aes_string(x = "res")) + geom_density(fill = geom.colors[1], alpha = 0.2) + stat_function( fun = dnorm, args = list( mean = mean(unname(stats::residuals(model)), na.rm = TRUE), sd = stats::sd(unname(stats::residuals(model)), na.rm = TRUE) ), colour = geom.colors[2], size = 0.8 ) + labs( x = "Residuals", y = "Density", title = "Non-normality of residuals", subtitle = "Distribution should look like normal curve" ) } #' @importFrom stats residuals rstudent fitted diag_qq <- function(model, geom.colors, dot.size, line.size, ...) { if (is.null(dot.size)) dot.size <- 1 if (is.null(line.size)) line.size <- 1 # qq-plot of studentized residuals if (inherits(model, c("lme", "lmerMod", "glmmTMB"))) { res_ <- sort(stats::residuals(model), na.last = NA) y_lab <- "Residuals" } else { # else, normal model res_ <- sort(stats::rstudent(model), na.last = NA) y_lab <- "Studentized Residuals" } fitted_ <- sort(stats::fitted(model), na.last = NA) # create data frame mydf <- stats::na.omit(data.frame(x = fitted_, y = res_)) # plot it ggplot(mydf, aes_string(x = "x", y = "y")) + geom_point(size = dot.size) + scale_colour_manual(values = geom.colors) + stat_smooth(method = "lm", se = FALSE, size = line.size) + labs( title = "Non-normality of residuals and outliers", subtitle = "Dots should be plotted along the line", y = y_lab, x = "Theoretical quantiles (predicted values)" ) } #' @importFrom purrr map map_dbl #' @importFrom stats qnorm ppoints diag_reqq <- function(model, dot.size) { if (!is_merMod(model) && !inherits(model, "glmmTMB")) return(NULL) if (!requireNamespace("lme4", quietly = TRUE)) { stop("Package 'lme4' required for this function to work, please install it.") } if (!requireNamespace("glmmTMB", quietly = TRUE)) { stop("Package 'glmmTMB' required for this function to work, please install it.") } if (inherits(model, "glmmTMB")) { re <- glmmTMB::ranef(model)[[1]] s1 <- TMB::sdreport(model$obj, getJointPrecision = TRUE) s2 <- sqrt(s1$diag.cov.random) se <- purrr::map(re, function(.x) { cnt <- nrow(.x) * ncol(.x) s3 <- s2[1:cnt] s2 <- s2[-(1:cnt)] s3 }) } else { re <- lme4::ranef(model, condVar = T) se <- purrr::map(re, function(.x) { pv <- attr(.x, "postVar") cols <- seq_len(dim(pv)[1]) unlist(lapply(cols, function(.y) sqrt(pv[.y, .y, ]))) }) } alpha <- .3 if (is.null(dot.size)) dot.size <- 2 # get ...-arguments add.args <- lapply(match.call(expand.dots = F)$`...`, function(x) x) if ("alpha" %in% names(add.args)) alpha <- eval(add.args[["alpha"]]) purrr::map2(re, se, function(.re, .se) { ord <- unlist(lapply(.re, order)) + rep((0:(ncol(.re) - 1)) * nrow(.re), each = nrow(.re)) df.y <- unlist(.re)[ord] df.ci <- stats::qnorm(.975) * .se[ord] pDf <- data_frame( y = df.y, ci = df.ci, nQQ = rep(stats::qnorm(stats::ppoints(nrow(.re))), ncol(.re)), ID = factor(rep(rownames(.re), ncol(.re))[ord], levels = rownames(.re)[ord]), ind = gl(ncol(.re), nrow(.re), labels = names(.re)), conf.low = df.y - df.ci, conf.high = df.y + df.ci ) ggplot(pDf, aes_string( x = "nQQ", y = "y" )) + facet_wrap(~ ind, scales = "free") + labs(x = "Standard normal quantiles", y = "Random effect quantiles") + geom_intercept_line2(0, NULL) + stat_smooth(method = "lm", alpha = alpha) + geom_errorbar( aes_string(ymin = "conf.low", ymax = "conf.high"), width = 0, colour = "black" ) + geom_point(size = dot.size, colour = "darkblue") }) } #' @importFrom stats coef diag_vif <- function(fit) { if (is_merMod(fit) || inherits(fit, "lme")) return(NULL) if (!requireNamespace("car", quietly = TRUE)) stop("Package `car` needed for this function to work. Please install it.", call. = F) vifplot <- NULL # check if we have more than 1 term if (length(stats::coef(fit)) > 2) { # variance inflation factor # claculate VIF vifval <- car::vif(fit) if (is.matrix(vifval)) { val <- vifval[, 1] } else { val <- vifval } # retrieve highest VIF-value to determine y-axis range maxval <- val[which.max(val)] # determine upper limit of y-axis upperLimit <- 10 # check whether maxval exceeds the critical VIF-Limit # of 10. If so, set upper limit to max. value if (maxval >= upperLimit) upperLimit <- ceiling(maxval) mydat <- data.frame(vif = round(val, 2)) %>% rownames_as_column(var = "vars") vifplot <- ggplot(mydat, aes_string(x = "vars", y = "vif")) + geom_bar(stat = "identity", width = 0.7, fill = "#80acc8") + geom_hline(yintercept = 5, linetype = 2, colour = "darkgreen", alpha = 0.7) + geom_hline(yintercept = 10, linetype = 2, colour = "darkred", alpha = 0.7) + annotate("text", x = 1, y = 4.7, label = "good", size = 4, colour = "darkgreen") + annotate("text", x = 1, y = 9.7, label = "tolerable", size = 4, colour = "darkred") + labs(title = "Variance Inflation Factors (multicollinearity)", x = NULL, y = NULL) + scale_y_continuous(limits = c(0, upperLimit), expand = c(0, 0)) } vifplot } sjPlot/NEWS.md0000644000176200001440000005101614073077247012641 0ustar liggesusers# sjPlot 2.8.9 ## Bug fixes * Fixed issues from CRAN checks. # sjPlot 2.8.8 ## Changes to functions * `tab_model()` now works properly with forthcoming *parameters* update. ## Bug fixes * `plot_models()` did not work properly for Bayesian models. # sjPlot 2.8.7 ## Changes to functions * `tab_model()` also gains an `encoding` argument. * `tab_df()` and `tab_dfs()` no longer set the argument `show.rownames` to `TRUE`. Therefore, both functions now use row numbers as row names, if no other rownames are present. * `tab_dfs()` also gains a `digits` argument. ## Bug fixes * Argument `df.method` in `tab_model()` did not accept all available options that were documented. * Fix CRAN check issues (warnings in new R-devel). # sjPlot 2.8.6 ## Changes to functions * When `dv.labels = ""` in `tab_model()`, the row with names of dependent variables is omitted. ## Bug fixes * Fix CRAN check issues (removed cross-references to archived packages). * The `minus.sign` argument in `tab_model()` now works. * `show.std = TRUE` in `tab_model()` did not exponentiate standardized coefficients for non-Gaussian models. # sjPlot 2.8.5 ## Changes to functions * `tab_model()` gains an argument `df.method`, which will replace the less generic `p.val` argument in the future. Currently, `df.method` is an alias of `p.val`. ## Bug fixes * Fixed issue with wrong n's in `plot_stackfrq()` when weights were applied. * Fixed issue `plot_stackfrq()` when weights were applied and items should be sorted. * Fixed issue in `plot_models()` for models without intercept. * Fixed issue for wrong legend labelling in `plot_models()` when showing p-stars. * Fixed issue in `plot_model()` with `type = "int"` in detecting interaction terms when these were partly in parenthesis (like `a * (b + c)`). * Fixed issue in `tab_model()` with arguments `show.stat = TRUE` and `show.std = TRUE`, where the related statistic and CI columns for standardized coefficients were not shown. * Fixed issue in `tab_model()` for *brmsfit* models that did no longer show random effects information after the last update from the *performance* package. * Fixed issue with argument `show.rownames` in `tab_df()`. # sjPlot 2.8.4 ## Changes to functions * The robust estimation (argument `vcov.fun` in `tab_model()` or `plot_model()`) now also uses and thus accepts estimation-types from package *clubSandwich*. * `tab_model()` now accepts all options for `p.val` that are supported by `parameters::model_parameters()`. * The `p.style` argument in `tab_model()` was slightly revised, and now also accepts `"scientific"` as option for scientific notation of p-values. * `tab_model()` gets a `digits.re` argument to define decimal part of the random effects summary. * `plot_models()` gains `value.size` and `line.size` arguments, similar to `plot_model()`. * `plot_models()` should sort coefficients in their natural order now. ## Bug fixes * Fixed bug in `plot_xtab()` with wrong order of legend labels. * Fixed bug in `plot_models()` with wrong axis title for exponentiated coefficients. * Fixed bug in `tab_model()` that did not show standard error of standardized coefficients when `show.se = TRUE`. # sjPlot 2.8.3 ## General * `tab_model()` and `plot_model()` now support _clogit_ models (requires latest update of package **insight**). ## Changes to functions * `tab_model()` gets a `p.adjust` argument to adjust p-values for multiple comparisons. * `tab_model()`, `plot_model()` and `plot_models()` get a `robust`-argument to easily compute standard errors, confidence intervals and p-values based on robust estimation of the variance-covariance matrix. `robust` is just a convenient shortcut for `vcov.fun` and `vcov.type`. ## Bug fixes * Fixed issue in `tab_model()` and `plot_model()` for certain cases when coefficients could not be estimated and were `NA`. * Fixed issue in `tab_model()` with `collapse.ci` for Bayesian models. * Fixed issue in `tab_model()` when `p.val="kr"` and `show.df=TRUE`. * Fixed issue in `tab_model()` with formatting issues of p-values when standardized coefficients where requested. * Fixed issue in `tab_model()` due to changes in other packages *sjPlot* depends on. # sjPlot 2.8.2 ## Function renaming * `sjt.itemanalysis()` is now named `tab_itemscale()`. * `sjt.xtab()` is now named `tab_xtab()`. ## Changes to functions * Improved handling for `tab_model()` of robust estimation in general and Kenward-Roger or Satterthwaite approximations in particular for linear mixed models. * Revised code to cope with forthcoming *tidyselect*-update. ## Bug fixes * Improved `tab_df()` now uses value labels for factors instead of numeric values. * Fixed some issues related to the lates *brms*-update. # sjPlot 2.8.1 ## Changes to functions * `tab_model()` gets arguments `bootstrap`, `iterations` and `seed` to return bootstrapped estimates. ## Bug fixes * Fixed issue in `tab_model()` with detecting labels when `auto.label = TRUE`. * Fixed issue in `tab_model()` for negative binomial hurdle mixed models (i.e. *glmmTMB* models with truncated negative-binomial family). * Fixed bug in `tab_model()` with `show.reflvl = TRUE`. * Fixed bug in `tab_model()` where labels for coefficients where not matching the correct coefficients. # sjPlot 2.8.0 ## Breaking changes * Cluster functions have been removed, as these are now re-implemented in the **parameters** package. ## General * Standardization of model parameters (in `plot_model()` or `tab_model()`) now uses standardization based on refitting the model (see [vignette](https://easystats.github.io/effectsize/articles/standardize_parameters.html) for details). ## Changes to functions * `plot_model()` gets `type = "emm"` as marginal effects plot type, which is similar to `type = "eff"`. See [Plotting Marginal Effects of Regression Models](https://strengejacke.github.io/sjPlot/articles/plot_marginal_effects.html) for details. * The `verbose`-argument in `view_df()` now defaults to `FALSE`. * Updated and re-arranged internal color palette, especially to have a better behaviour when selecting colors from continuous palettes (see `show_pals()`). ## Bug fixes * `sort.est = NULL` in `plot_model()` now preserves original order of coefficients. * Fixed bug in automatic axis labelling for `plot_frq()` for non-labelled, numeric values. * Fixed bug in `plot_frq()` when plotting factors. * Arguments `string.std_ci` and `string.std_se` are no longer ignored in `tab_model()`. # sjPlot 2.7.2 ## General * Replaced `performance::principal_component()` by `parameters::principal_component()`. * Fixed CRAN check issues, due to the latest *bayestestR* update. ## Function renaming * `sjp.grpfrq()` is now names `plot_grpfrq()`. * `sjp.xtab()` is now names `plot_xtab()`. ## Changes to functions * `plot_grid()` gets a `tags`-argument to add tags to plot-panels. ## Bug fixes * Fixed bug in `plot_stackfrq()` for data frames with many missing values. * Fixed bug with sorting frequencies in `plot_frq()` when vector had more labels than values. * Fixed bug in `tab_model()` where `show.reflvl = TRUE` did not insert the reference category in first place, but in alphabetical order. # sjPlot 2.7.1 ## General * Minor revisions to meet the changes in the forthcoming update from tidyr. * new color palettes were added (see `show_sjplot_pals()`). ## Changes to functions * `tab_model()` now supports *gamlss* models. * `tab_df()` gets a `digits` argument, to round numeric values in output. ## Bug fixes * Fixed bug in `tab_model()` with `show.df = TRUE` for *lmerModLmerTest*. * Fixed bug in `tab_stackfrq()` when items had different amount of valid values. # sjPlot 2.7.0 ## Renamed functions * `sjp.stackfrq()` was renamed to `plot_stackfrq()`. * `sjt.stackfrq()` was renamed to `tab_stackfrq()`. ## Changes to functions ### `plot_likert()` * showed category labels in the top and bottom legends in two rows if there are more than six categories. Also, the categories are ordered column wise instead of row wise. This behaviour can now be controlled for grouped likert plots, using `group.legend.options`. The ordering now defaults to row wise and the user can force all categories onto a single row. * now automatically adjusts labels to avoid overlapping. ### `tab_model()` * now supports `wbm()`-models from the *panelr*-package. * gets a `show.aicc`-argument to show the second order AIC. * gets a `show.reflvl`-argument to show the reference level of factors. * gets a `string.std_se` and `string.std_ci`-argument to change the column header for standard errors and confidence intervals of standardized coefficients. * no longer prints a message that default p-values for mixed models are based on Wald approximation. * `show.ci50` defaults to `FALSE` now. ### `sjt.itemanalysis()` * `sjt.itemanalysis()` now works on ordered factors. A clearer error message was added when unordered factors are used. The old error message was not helpful. * The `factor.groups` argument can now be `"auto"` to detect factor groups based on a pca with Varimax rotation. ### `sjp.stackrq()` * `sjp.stackfrq()` was renamed to `plot_stackfrq()`. * `sjp.stackfrq()` (now named: `plot_stackfrq()`) gets a `show.n`-argument to also show count values. This option can be combined with `show.prc`. * `sjp.stackfrq()` (now named: `plot_stackfrq()`) now also works on grouped data frames. ### changes to other functions * `plot_model()` now supports `wbm()`-models from the *panelr*-package. ## Bug fixes * `plot_model(type = "int")` now also recognized interaction terms with `:` in formula. * Argument `string.est` in `tab_model()` did not overwrite the default label for the estimate-column-header. * Minor fix in `tab_model()` for mixed models that can't compute R2. * Fix issue in `tab_model()` when printing robust standard errors and CI (i.e. when using arguments `vcov*`). * The `plot_likert()` option `reverse.scale = TRUE` resulted in `values = "sum.inside"` being outside and the other way around. This is fixed now. * `view_df()` mixed up labels and frequency values when value labels were present, but no such values were in the data. * Argument `wrap.labels` in `plot_frq()` did not properly work for factor levels. * Fix issue in `plot_models()` that stopped for some models. * Fix issue in `sjt.stackfrq()`, when `show.na = TRUE` and some items had zero-values. # sjPlot 2.6.3 ## General * Export `dplyr::n()`, to meet changes in dplyr 0.8.0. * `plot_model()` and `tab_model()` now support `MixMod`-objects from package **GLMMadpative**, `mlogit`- and `gmnl`-models. ## Renamed functions * `sjp.kfold_cv()` was renamed to `plot_kfold_cv()`. * `sjp.frq()` was renamed to `plot_frq()`. ## Changes to functions ### tab_model() * `tab_model()` gets a `show.ngrps`-argument, which adds back the functionality to print the number of random effects groups for mixed models. * `tab_model()` gets a `show.loglik`-argument, which adds back the functionality to print the model's log-Likelihood. * `tab_model()` gets a `strings`-argument, as convenient shortcut for setting column-header strings. * `tab_model()` gets additional arguments `vcov.fun`, `vcov.type` and `vcov.args` that are passed down to `sjstats::robust()`, to calculate different types of (clustered) robust standard errors. * The `p.style`-argument now also allows printing both numeric p-values and asterisks, by using `p.style = "both"`. ### plot_likert() * `plot_likert()` gets a `reverse.scale` argument to reverse the order of categories, so positive and negative values switch position. * `plot_likert()` gets a `groups` argument, to group items in the plot (thanks to @ndevln). * Argument `grid.range` in `plot_likert()` now may also be a vector of length 2, to define diffent length for the left and right x-axis scales. ### Other * `plot_frq()` (former `sjp.frq()`) now has pipe-consistent syntax, enables plotting multiple variables in one function call and supports grouped data frames. * `plot_model()` gets additional arguments `vcov.fun`, `vcov.type` and `vcov.args` that are passed down to `sjstats::robust()`, to calculate different types of (clustered) robust standard errors. * `sjt.xtab()`, `sjp.xtab()`, `plot_frq()` and `sjp.grpfrq()` get a `drop.empty()`-argument, to drop values / factor levels with no observations from output. ## Bug fixes * Legend labels were inverted for **brms**-models in `plot_model(..., type = "diag")`. * Legend labels were duplicated for marginal effects plots when `color ="bw"` and `legend.title` was specified. * Fixed encoding issues with help-files. * `view_df()` did not truncate frequency- and percentage-values for variables where value labels were truncated to a certain maximum number. * `tab_model()` did not print number of observations for `coxph`-models. # sjPlot 2.6.2 ## General * Revised some help-files and vignettes. ## Removed / Defunct Following functions are now defunct: * `sjt.lm()`, `sjt.glm()`, `sjt.lmer()` and `sjt.glmer()`. Please use `tab_model()` instead. ## Changes to functions * `tab_model()` supports printing simplex parameters of monotonic effects of **brms** models. * `tab_model()` gets a `prefix.labels`-argument to add a prefix to the labels of categorical terms. * The `rotation`-argument in `sjt.pca()` and `sjp.pca()` now supports all rotations from `psych::principal()`. ## Bug fixes * `plot_model()` no longer automatically changes the plot-type to `"slope"` for models with only one predictor that is categorical and has more than two levels. * `type = "eff"` and `type = "pred"` in `plot_model()` did not work when `terms` was not specified. * If robust standard errors are requested in `tab_model()`, the confidence intervals and p-values are now re-calculated and adjusted based on the robust standard errors. * `colors = "bw"` was not recognized correctly for `plot_model(..., type = "int")`. * Fix issue in `sjp.frq()` with correct axis labels for non-labelled character vectors. # sjPlot 2.6.1 ## General * Removed defunct functions. ## Deprecated * `sjt.lm()`, `sjt.glm()`, `sjt.lmer()` and `sjt.glmer()` are now deprecated. Please use `tab_model()` instead. ## Changes to functions * Arguments `dot.size` and `line.size` in `plot_model()` now also apply to marginal effects and diagnostic plots. * `plot_model()` now uses a free x-axis scale in facets for models with zero-inflated part. * `plot_model()` now shows multiple plots for models with zero-inflated parts when `grids = FALSE`. * `tab_model()` gets a `p.style` and `p.threshold` argument to indicate significance levels as asteriks, and to determine the threshold for which an estimate is considered as significant. * `plot_model()` and `plot_models()` get a `p.threshold` argument to determine the threshold for which an estimate is considered as significant. ## Bug fixes * Fixed bug from the last update that made value labels disappear for `plot_likert()`. * `tab_model()` now also accepts multiple model-objects stored in a `list` as argument, as stated in the help-file. * The `file`-argument now works again in `sjt.itemanalysis()`. * Argument `show.ci` in `tab_model()` did not compute confidence intervals for different levels. # sjPlot 2.6.0 ## General * `sjp.scatter()` was revised and renamed to `plot_scatter()`. `plot_scatter()` is pipe-friendly, and also works on grouped data frames. * `sjp.gpt()` was revised and renamed to `plot_gpt()`. `plot_gpt()` is pipe-friendly, and also works on grouped data frames. * Reduce package dependencies. ## Renamed functions * `sjp.scatter()` was renamed to `plot_scatter()`. * `sjp.likert()` was renamed to `plot_likert()`. * `sjp.gpt()` was renamed to `plot_gpt()`. * `sjp.resid()` was renamed to `plot_residuals()`. ## Changes to functions * Improved support for `brmsfit`-objects with categorical-family for `plot_model()` and `tab_model()`. * `tab_model()` gets a `show.adj.icc`-argument, to also show the adjusted ICC for mixed models. * `tab_model()` gets a `col.order`-argument, reorder the table columns. * Argument `hide.progress` in `view_df()` is deprecated. Please use `verbose` now. * The `statistics`-argument in `sjt.xtab()` gets a `"fisher"`-option, to force Fisher's Exact Test to be used. ## Removed / Defunct Following functions are now defunct: * `sjp.lm()`, `sjp.glm()`, `sjp.lmer()`, `sjp.glmer()` and `sjp.int()`. Please use `plot_model()` instead. * `sjt.frq()`. Please use `sjmisc::frq(out = "v")` instead. ## Bug fixes * Due to changes in the _broom_ and _lmerTest_ packages, tidiers did no longer work for `lmerModLmerTest` objects. * Fix issue with standardized coefficient (argument `show.std`) in `tab_model()`. # sjPlot 2.5.0 ## New functions * `tab_model()` as replacement for `sjt.lm()`, `sjt.glm()`, `sjt.lmer()` and `sjt.glmer()`. Furthermore, `tab_model()` is designed to work with the same model-objects as `plot_model()`. * New colour scales for ggplot-objects: `scale_fill_sjplot()` and `scale_color_sjplot()`. These provide predifined colour palettes from this package. * `show_sjplot_pals()` to show all predefined colour palettes provided by this package. * `sjplot_pal()` to return colour values of a specific palette. ## Deprecated Following functions are now deprecated: * `sjp.lm()`, `sjp.glm()`, `sjp.lmer()`, `sjp.glmer()` and `sjp.int()`. Please use `plot_model()` instead. * `sjt.frq()`. Please use `sjmisc::frq(out = "v")` instead. ## Removed / Defunct Following functions are now defunct: * `sjt.grpmean()`, `sjt.mwu()` and `sjt.df()`. The replacements are `sjstats::grpmean()`, `sjstats::mwu()` and `tab_df()` resp. `tab_dfs()`. ## Changes to functions * `plot_model()` and `plot_models()` get a `prefix.labels`-argument, to prefix automatically retrieved term labels with either the related variable name or label. * `plot_model()` gets a `show.zeroinf`-argument to show or hide the zero-inflation-part of models in the plot. * `plot_model()` gets a `jitter`-argument to add some random variation to data points for those plot types that accept `show.data = TRUE`. * `plot_model()` gets a `legend.title`-argument to define the legend title for plots that display a legend. * `plot_model()` now passes more arguments in `...` down to `ggeffects::plot()` for marginal effects plots. * `plot_model()` now plots the zero-inflated part of the model for `brmsfit`-objects. * `plot_model()` now plots multivariate response models, i.e. models with multiple outcomes. * Diagnostic plots in `plot_model()` (`type = "diag"`) can now also be used with `brmsfit`-objects. * Axis limits of diagnostic plots in `plot_model()` (`type = "diag"`) for Stan-models (`brmsfit` or `stanreg` resp. `stanfit`) can now be set with the `axis.lim`-argument. * The `grid.breaks`-argument for `plot_model()` and `plot_models()` now also takes a vector of values to directly define the grid breaks for the plot. * Better default calculation for grid breaks in `plot_model()` and `plot_models()` when the `grid.breaks`-argument is of length one. * The `terms`-argument for `plot_model()` now also allows the specification of a range of numeric values in square brackets for marginal effects plots, e.g. `terms = "age [30:50]"` or `terms = "age [pretty]"`. * For coefficient-plots, the `terms`- and `rm.terms`-arguments for `plot_model()` now also allows specification of factor levels for categorical terms. Coefficients for the indicted factor levels are kept resp. removed (see `?plot_model` for details). * `plot_model()` now supports `clmm`-objects (package *ordinal*). * `plot_model(type = "diag")` now also shows random-effects QQ-plots for `glmmTMB`-models, and also plots random-effects QQ-plots for all random effects (if model has more than one random effect term). ## Bug fixes * `plot_model(type = "re")` now supports standard errors and confidence intervals for `glmmTMB`-objects. * Fixed typo for `glmmTMB`-tidier, which may have returned wrong data for zero-inflation part of model. * Multiple random intercepts for multilevel models fitted with `brms` area now shown in each own facet per intercept. * Remove unnecessary warning in `sjp.likert()` for uneven category count when neutral category is specified. * `plot_model(type = "int")` could not automatically select `mdrt.values` properly for non-integer variables. * `sjp.grpfrq()` now correctly uses the complete space in facets when `facet.grid = TRUE`. * `sjp.grpfrq(type = "boxplot")` did not correctly label the x-axis when one category had no elements in a vector. * Problems with German umlauts when printing HTML tables were fixed. sjPlot/MD50000644000176200001440000001721114150203412012027 0ustar liggesuserseb74aa5325069f5edc335953e1f16d9c *DESCRIPTION 1fe4ea87a95828d77eec0fb230715b70 *NAMESPACE b948cb07936e8dfabe6c46b2ed8d4e77 *NEWS.md c96eedf0dbd010dcdebfc07945caf6e7 *R/S3-methods.R d225ff1e4ec000885f62da9222ef5adb *R/color_utils.R 7524f0a7c1baf8c42633d5f671d4e3c9 *R/helpfunctions.R b387bf83b8af45b298ee5e8a1e191c81 *R/html_print.R 75b35f0e05c838bdecd4a5188aee1d36 *R/html_print_utils.R f82d3f7840225e66747af6febd3e2f9d *R/plot_diag_linear.R 541c4f829848be9f263a351d4bd3a940 *R/plot_diag_stan.R f82451b9fec1611a2cbab28c657c961a *R/plot_frq.R e5de0981cc6870900bbe044bf7a6ba6e *R/plot_gpt.R 8e6bb3b23becb1446af15ea0a748b31d *R/plot_grid.R a26d31733934872a049106d60b56ded3 *R/plot_grpfrq.R de28233976c04ff8deca904117334304 *R/plot_kfold_cv.R 5fd805e637609841989fb6e0bc8aa01e *R/plot_likert.R bdf82a3c38204bdfa83e8e90f66958f2 *R/plot_model.R dcd0d7b808c2c723a341abbfde85b822 *R/plot_model_estimates.R f964589c27b26def11bd4f22fc0bd6c3 *R/plot_models.R 0eb8d75b243c9af4a39df5f1772b8ac4 *R/plot_point_estimates.R 8d086e21b5c983ff7a2062cd7de69889 *R/plot_residuals.R 6cb2283b8dbcf917fd7f397318e3e863 *R/plot_scatter.R ad0e48f8102fa0dcee69a54b5aa22409 *R/plot_stackfrq.R ed97253e3d37b40eee2f48023b950c5c *R/plot_type_eff.R 637ae756a6c87816dd95444c32f4db47 *R/plot_type_est.R e2a5ca51c03d9fb22525d2a021107c98 *R/plot_type_int.R 28c33942fb77865e2cc30a7fe6481f13 *R/plot_type_ranef.R fcb66681b7a1672c21e1cbac2239ac2a *R/plot_type_slope.R 034b9f6964ab7f7c458f5df2ed61da84 *R/plot_xtab.R 5c54559b5979ea36dc1de28ca209cdb9 *R/save_plot.R 8d90486cabf6220a4d853e01c70dd3b1 *R/select_helpers.R 0920ede2ce30e114e3f370570e884ec8 *R/sjPlotAnova.R 2303569584703516acfcf2bbf471bed1 *R/sjPlotCorr.R 865bc7b1f2d533eb9f76a85a73c01702 *R/sjPlotDist.R fa0cf2208480a9af33cac63bab85b19d *R/sjPlotPearsonsChi2Test.R 0bde8107d61a4492b0388611e010c299 *R/sjPlotPolynomials.R c569fccdc72dc6f052dc83087be61807 *R/sjPlotSetTheme.R d10104f90274719e70397c5e9292f124 *R/sjplot.R 78a259f4aba07e9b95152c59e6013c1f *R/sjplot_themes.R 419c725b20f8788eaad9fac2968e460e *R/tab_corr.R ab510ea85f5b1391fc125be1c2ad27d1 *R/tab_fa.R ed344abb92c827f0c3170a59f1f09b78 *R/tab_itemscale.R 31d947cc9e1f3a28365d706145af301f *R/tab_model.R cf7b6e29731aba6d622fc60c2e272bc6 *R/tab_pca.R add9a3be6b9d1003ff11634db22330cb *R/tab_stackfrq.R eacf41c8eda2bc54efbf2d81e3a1f9c4 *R/tab_xtab.R b5cfac1906e61bb4ac78c34f92efe05a *R/tidiers.R 335048d8972b35c02d8ad0ab6bb8663d *R/utils.R d1e743beb535341845241e0fe385477f *R/view_df.R 27fa7d7eca17f1f826e994fb394f2241 *R/zzz.R 105f249ebe0abbe58c412340655a4c05 *README.md 223b9a52b703d07a1e4c68f702365b47 *build/vignette.rds 3172b22b3d87d0f86d78326bc26891fc *data/efc.RData be42ec0232aeaa088bacf501d1a0816f *inst/CITATION 33c02b3fd177bcfb937e29f89704ebb5 *inst/doc/blackwhitefigures.R ff11d0d00ced22544efb12e547900a01 *inst/doc/blackwhitefigures.Rmd d3c66d97ef5d99ec6f07595f93e32500 *inst/doc/blackwhitefigures.html 2e6bcc8e0d76b0ef1b2ab6aff2159475 *inst/doc/custplot.R adbcee55ed3c7ad97a40b8462fec8e40 *inst/doc/custplot.Rmd 38ef0ecc5cefbca2a4e5e98b428ebf5b *inst/doc/custplot.html c1f257be4cc130577dd58109636828d6 *inst/doc/plot_interactions.R c7d758d22442627c74960a059bfa4383 *inst/doc/plot_interactions.Rmd f3d18c67d6acc5a9cc591778954629f6 *inst/doc/plot_interactions.html 49fbc6936987732721dc6322a6bf2b11 *inst/doc/plot_likert_scales.R 46e18bf5a498c8942222d4913b8077ea *inst/doc/plot_likert_scales.Rmd f03bd46a82cea6eee0f6ff1f6ed289a1 *inst/doc/plot_likert_scales.html 9ec25a93ffffb2b4fb6637d83636be4a *inst/doc/plot_marginal_effects.R d948900dae52973ec2cbce09ae5fdaf6 *inst/doc/plot_marginal_effects.Rmd b3c5ad7097bf68680955ae712e9a6fe8 *inst/doc/plot_marginal_effects.html 7aa4c3c1c287d7b62427a071ee642b8f *inst/doc/plot_model_estimates.R ac747baa07bea86dd32ace94b6133000 *inst/doc/plot_model_estimates.Rmd 50d2af158011f3e3ca66135079f3b512 *inst/doc/plot_model_estimates.html 2e72bd3f47d820bb077dd01d15099635 *inst/doc/sjtitemanalysis.R 158637bc6d31e13d171a40cb5758977f *inst/doc/sjtitemanalysis.Rmd eacf67bf81bec74f9315363e5049791d *inst/doc/sjtitemanalysis.html 163f19a85c8ad05c8cb87a60b5dbcc70 *inst/doc/tab_bayes.R baae473ecd5dfcc85d990bb59cc46dce *inst/doc/tab_bayes.Rmd a5e2d2735f745b685d1beb271c500d92 *inst/doc/tab_bayes.html 6fb92209f8625f0ab4a89c2a73c9abed *inst/doc/tab_mixed.R 437f5f47e54f228117f8216ab10a5df2 *inst/doc/tab_mixed.Rmd 2d98aa6454a29dc7dbfa9cd61644aeed *inst/doc/tab_mixed.html 1a870aec4415bd1102d570fa9a28dff8 *inst/doc/tab_model_estimates.R ea09c571fd08b35a7bf6333460f8205f *inst/doc/tab_model_estimates.Rmd 40356318f4a1c32eb72f8e57e931510d *inst/doc/tab_model_estimates.html d06194f1cd64b496f74cc1b199be4b7e *inst/doc/tab_model_robust.R 927ae9c1d8a8b86e54bec6b8a59f2424 *inst/doc/tab_model_robust.Rmd 9a08490d84a46fc199b6ef28937e4caa *inst/doc/tab_model_robust.html 0710da8ac692aa6c7da297d4ba72c613 *inst/doc/table_css.R 0fc5796374d72fabe20ccb9a7ea7ee8d *inst/doc/table_css.Rmd 44f6ecce728e75c42b74fb8ee69e7b00 *inst/doc/table_css.html 305ee5938d9bf4c982eb8bd7d475da4e *man/dist_chisq.Rd a8879ebb43a34ce55e4a07620db19f5f *man/dist_f.Rd b720522e90832c162bba68335ac15182 *man/dist_norm.Rd f904e0334b6dc3edfb503ebcee23de4c *man/dist_t.Rd a266bdb43b55140bc7146a2dee5290ff *man/efc.Rd 30519380b57b8139720feca91b45f558 *man/figures/logo.png ad09cdfe20ad1608e361fa8b5558639d *man/plot_frq.Rd 4bb23bd40c67534f21ba316fd64aa7c8 *man/plot_gpt.Rd d2c1e08d93ce6b8a3fed4ef3da18f2c9 *man/plot_grid.Rd 45bb45351bffcf58dd247a6c9e7dabdf *man/plot_grpfrq.Rd 573e33de2a76cca57d0b4f35fa401dea *man/plot_kfold_cv.Rd d36a73a71099f09b7147ced539e8a509 *man/plot_likert.Rd 7f71406603bae352e01e50f487130452 *man/plot_model.Rd f973d31c567bb06d9e4cad2c35d4488f *man/plot_models.Rd 8f320344fcee75519974ea48b7498c6e *man/plot_residuals.Rd b96419254d6a8f31b1473324b1419a0e *man/plot_scatter.Rd 9a22a52ec01a33f3c180c6f62ab2f4e6 *man/plot_stackfrq.Rd 462bb0e71e78923dfabd01e5815f63f5 *man/plot_xtab.Rd 25aadd93594efbd63282d47467046666 *man/save_plot.Rd 61fc906066b94455cea71893a925b9df *man/set_theme.Rd cdc6ce4a547480536c56169e3e02187d *man/sjPlot-package.Rd 98d1863504b57bee7e9a1b1cc406f6e8 *man/sjPlot-themes.Rd 55cb9ebc59eeeaf4e8e2d36b03772b23 *man/sjp.aov1.Rd 1a3e6992e9a378d33c3025794d6dd867 *man/sjp.chi2.Rd 873f8a260c435153ac6b1e021134467f *man/sjp.corr.Rd aa7c834f0a65bb345ba2332d3c183cfa *man/sjp.poly.Rd 2151735c6933cd13802a5c4d1a9982f1 *man/sjplot.Rd 6d968eba2136d01bf8176bfd3fa67597 *man/tab_corr.Rd 22e5434b88957821450d0fedd297443c *man/tab_df.Rd 3f8a9110ac979a57872367e426a9db44 *man/tab_fa.Rd 57462d1b4232b031d7d123fe96699926 *man/tab_itemscale.Rd 670e5832894e0bbb8e667c97f0db58e7 *man/tab_model.Rd 00e4666cd9183a9d097c2fdcf66a2931 *man/tab_pca.Rd a293dc6c8ab521888dec4085b5c668c1 *man/tab_stackfrq.Rd 2e722a7a393faa741f7a7e52b6632b72 *man/tab_xtab.Rd cecfc4c831a7a05aa17552221ae98d28 *man/view_df.Rd 661c247277a468106666cc00dd667a9d *tests/testthat.R ff8015c59c63ab60cb02b80c5bed1c0c *tests/testthat/test-plot_grpfrq.R 8d5d4a1472a27d590a50b245868498ac *tests/testthat/test-plot_model_std.R d438196e480a5cc3a3ff58496d701239 *tests/testthat/test-tab_model.R ff11d0d00ced22544efb12e547900a01 *vignettes/blackwhitefigures.Rmd adbcee55ed3c7ad97a40b8462fec8e40 *vignettes/custplot.Rmd c7d758d22442627c74960a059bfa4383 *vignettes/plot_interactions.Rmd 46e18bf5a498c8942222d4913b8077ea *vignettes/plot_likert_scales.Rmd d948900dae52973ec2cbce09ae5fdaf6 *vignettes/plot_marginal_effects.Rmd ac747baa07bea86dd32ace94b6133000 *vignettes/plot_model_estimates.Rmd 158637bc6d31e13d171a40cb5758977f *vignettes/sjtitemanalysis.Rmd baae473ecd5dfcc85d990bb59cc46dce *vignettes/tab_bayes.Rmd 437f5f47e54f228117f8216ab10a5df2 *vignettes/tab_mixed.Rmd ea09c571fd08b35a7bf6333460f8205f *vignettes/tab_model_estimates.Rmd 927ae9c1d8a8b86e54bec6b8a59f2424 *vignettes/tab_model_robust.Rmd 0fc5796374d72fabe20ccb9a7ea7ee8d *vignettes/table_css.Rmd sjPlot/inst/0000755000176200001440000000000014150131571012500 5ustar liggesuserssjPlot/inst/doc/0000755000176200001440000000000014150131571013245 5ustar liggesuserssjPlot/inst/doc/tab_model_robust.html0000644000176200001440000023373114150131567017475 0ustar liggesusers Robust Estimation of Standard Errors, Confidence Intervals and p-values

Robust Estimation of Standard Errors, Confidence Intervals and p-values

The tab_model() function also allows the computation of standard errors, confidence intervals and p-values based on robust covariance matrix estimation from model parameters. Robust estimation is based on the packages sandwich and clubSandwich, so all models supported by either of these packages work with tab_model().

Classical Regression Models

Robust Covariance Matrix Estimation from Model Parameters

There are three arguments that allow for choosing different methods and options of robust estimation: vcov.fun, vcov.type and vcov.args. Let us start with a simple example, which uses a heteroskedasticity-consistent covariance matrix estimation with estimation-type “HC3” (i.e. sandwich::vcovHC(type = "HC3") is called):

data(iris)
model <- lm(Petal.Length ~ Sepal.Length * Species + Sepal.Width, data = iris)

# model parameters, where SE, CI and p-values are based on robust estimation
tab_model(model, vcov.fun = "HC", show.se = TRUE)
  Petal Length
Predictors Estimates std. Error CI p
(Intercept) 0.87 0.45 -0.03 – 1.76 0.059
Sepal Length 0.04 0.12 -0.19 – 0.28 0.711
Species [versicolor] -0.78 0.69 -2.15 – 0.59 0.265
Species [virginica] -0.41 0.63 -1.66 – 0.83 0.513
Sepal Width 0.11 0.08 -0.05 – 0.27 0.190
Sepal Length * Species
[versicolor]
0.61 0.13 0.35 – 0.87 <0.001
Sepal Length * Species
[virginica]
0.68 0.12 0.45 – 0.91 <0.001
Observations 150
R2 / R2 adjusted 0.979 / 0.978

# compare standard errors to result from sandwich-package
unname(sqrt(diag(sandwich::vcovHC(model))))
#> [1] 0.45382603 0.11884474 0.69296611 0.63031982 0.08318559 0.13045539 0.11841325

Cluster-Robust Covariance Matrix Estimation (sandwich)

If another covariance matrix estimation is required, use the vcov.fun-argument. This argument needs the suffix for the related vcov*()-functions as value, i.e. vcov.fun = "CL" would call sandwich::vcovCL(), or vcov.fun = "HAC" would call sandwich::vcovHAC().

The specific estimation type can be changed with vcov.type. E.g., sandwich::vcovCL() accepts estimation types HC0 to HC3. In the next example, we use a clustered covariance matrix estimation with HC1-estimation type.

# change estimation-type
tab_model(model, vcov.fun = "CL", vcov.type = "HC1", show.se = TRUE)
  Petal Length
Predictors Estimates std. Error CI p
(Intercept) 0.87 0.42 0.03 – 1.70 0.042
Sepal Length 0.04 0.11 -0.18 – 0.26 0.692
Species [versicolor] -0.78 0.65 -2.07 – 0.51 0.237
Species [virginica] -0.41 0.59 -1.57 – 0.75 0.483
Sepal Width 0.11 0.08 -0.05 – 0.27 0.170
Sepal Length * Species
[versicolor]
0.61 0.12 0.37 – 0.85 <0.001
Sepal Length * Species
[virginica]
0.68 0.11 0.46 – 0.90 <0.001
Observations 150
R2 / R2 adjusted 0.979 / 0.978

# compare standard errors to result from sandwich-package
unname(sqrt(diag(sandwich::vcovCL(model))))
#> [1] 0.42197635 0.11148130 0.65274212 0.58720711 0.07934029 0.12251570 0.11058144

Usually, clustered covariance matrix estimation is used when there is a cluster-structure in the data. The variable indicating the cluster-structure can be defined in sandwich::vcovCL() with the cluster-argument. In tab_model(), additional arguments that should be passed down to functions from the sandwich package can be specified in vcov.args:

iris$cluster <- factor(rep(LETTERS[1:8], length.out = nrow(iris)))
# change estimation-type, defining additional arguments
tab_model(
  model, 
  vcov.fun = "CL", 
  vcov.type = "HC1",
  vcov.args = list(cluster = iris$cluster),
  show.se = TRUE
)
  Petal Length
Predictors Estimates std. Error CI p
(Intercept) 0.87 0.34 0.20 – 1.53 0.011
Sepal Length 0.04 0.07 -0.10 – 0.19 0.540
Species [versicolor] -0.78 0.52 -1.80 – 0.25 0.137
Species [virginica] -0.41 0.26 -0.94 – 0.11 0.120
Sepal Width 0.11 0.07 -0.03 – 0.25 0.131
Sepal Length * Species
[versicolor]
0.61 0.10 0.42 – 0.80 <0.001
Sepal Length * Species
[virginica]
0.68 0.05 0.58 – 0.78 <0.001
Observations 150
R2 / R2 adjusted 0.979 / 0.978

# compare standard errors to result from sandwich-package
unname(sqrt(diag(sandwich::vcovCL(model, cluster = iris$cluster))))
#> [1] 0.33714287 0.07192334 0.51893777 0.26415406 0.07201145 0.09661348 0.05123446

Cluster-Robust Covariance Matrix Estimation (clubSandwich)

Cluster-robust estimation of the variance-covariance matrix can also be achieved using clubSandwich::vcovCR(). Thus, when vcov.fun = "CR", the related function from the clubSandwich package is called. Note that this function requires the specification of the cluster-argument.

# create fake-cluster-variable, to demonstrate cluster robust standard errors
iris$cluster <- factor(rep(LETTERS[1:8], length.out = nrow(iris)))

# cluster-robust estimation
tab_model(
  model, 
  vcov.fun = "CR", 
  vcov.type = "CR1",
  vcov.args = list(cluster = iris$cluster),
  show.se = TRUE
)
  Petal Length
Predictors Estimates std. Error CI p
(Intercept) 0.87 0.33 0.21 – 1.52 0.010
Sepal Length 0.04 0.07 -0.10 – 0.18 0.531
Species [versicolor] -0.78 0.51 -1.78 – 0.23 0.129
Species [virginica] -0.41 0.26 -0.92 – 0.10 0.112
Sepal Width 0.11 0.07 -0.03 – 0.25 0.123
Sepal Length * Species
[versicolor]
0.61 0.09 0.42 – 0.79 <0.001
Sepal Length * Species
[virginica]
0.68 0.05 0.58 – 0.78 <0.001
Observations 150
R2 / R2 adjusted 0.979 / 0.978

# compare standard errors to result from clubSsandwich-package
unname(sqrt(diag(clubSandwich::vcovCR(model, type = "CR1", cluster = iris$cluster))))
#> [1] 0.33028501 0.07046034 0.50838200 0.25878087 0.07054666 0.09464825 0.05019229

Robust Covariance Matrix Estimation on Standardized Model Parameters

Finally, robust estimation can be combined with standardization. However, robust covariance matrix estimation only works for show.std = "std".

# model parameters, robust estimation on standardized model
tab_model(
  model, 
  show.std = "std",
  vcov.fun = "HC"
)
  Petal Length
Predictors Estimates std. Beta CI standardized CI p std. p
(Intercept) 0.87 -1.30 -0.03 – 1.76 -1.44 – -1.16 0.059 <0.001
Sepal Length 0.04 0.02 -0.19 – 0.28 -0.09 – 0.13 0.711 0.711
Species [versicolor] -0.78 1.57 -2.15 – 0.59 1.40 – 1.74 0.265 <0.001
Species [virginica] -0.41 2.02 -1.66 – 0.83 1.84 – 2.20 0.513 <0.001
Sepal Width 0.11 0.03 -0.05 – 0.27 -0.01 – 0.07 0.190 0.190
Sepal Length * Species
[versicolor]
0.61 0.28 0.35 – 0.87 0.16 – 0.41 <0.001 <0.001
Sepal Length * Species
[virginica]
0.68 0.32 0.45 – 0.91 0.21 – 0.43 <0.001 <0.001
Observations 150
R2 / R2 adjusted 0.979 / 0.978

Mixed Models

Robust Covariance Matrix Estimation for Mixed Models

For linear mixed models, that by definition have a clustered (“hierarchical” or multilevel) structure in the data, it is also possible to estimate a cluster-robust covariance matrix. This is possible due to the clubSandwich package, thus we need to define the same arguments as in the above example.

library(lme4)
data(iris)
set.seed(1234)
iris$grp <- as.factor(sample(1:3, nrow(iris), replace = TRUE))

# fit example model
model <- lme4::lmer(
  Sepal.Length ~ Species * Sepal.Width + Petal.Length + (1 | grp),
  data = iris
)

# normal model parameters, like from 'summary()'
tab_model(model)
  Sepal Length
Predictors Estimates CI p
(Intercept) 1.55 0.76 – 2.35 <0.001
Species [versicolor] 0.41 -0.67 – 1.50 0.454
Species [virginica] -0.41 -1.56 – 0.74 0.483
Sepal Width 0.66 0.44 – 0.89 <0.001
Petal Length 0.82 0.69 – 0.95 <0.001
Species [versicolor] *
Sepal Width
-0.48 -0.85 – -0.12 0.010
Species [virginica] *
Sepal Width
-0.36 -0.71 – -0.00 0.048
Random Effects
σ2 0.09
τ00 grp 0.01
ICC 0.07
N grp 3
Observations 150
Marginal R2 / Conditional R2 0.860 / 0.870

# model parameters, cluster robust estimation for mixed models
tab_model(
  model, 
  vcov.fun = "CR", 
  vcov.type = "CR1", 
  vcov.args = list(cluster = iris$grp)
)
  Sepal Length
Predictors Estimates CI p
(Intercept) 1.55 0.76 – 2.35 <0.001
Species [versicolor] 0.41 -1.17 – 1.99 0.608
Species [virginica] -0.41 -0.78 – -0.03 0.033
Sepal Width 0.66 0.46 – 0.86 <0.001
Petal Length 0.82 0.72 – 0.91 <0.001
Species [versicolor] *
Sepal Width
-0.48 -1.18 – 0.21 0.172
Species [virginica] *
Sepal Width
-0.36 -0.57 – -0.15 0.001
Random Effects
σ2 0.09
τ00 grp 0.01
ICC 0.07
N grp 3
Observations 150
Marginal R2 / Conditional R2 0.860 / 0.870

Robust Covariance Matrix Estimation on Standardized Mixed Model Parameters

Again, robust estimation can be combined with standardization for linear mixed models as well, which in such cases also only works for show.std = "std".

# model parameters, cluster robust estimation on standardized mixed model
tab_model(
  model, 
  show.std = "std",
  vcov.fun = "CR", 
  vcov.type = "CR1", 
  vcov.args = list(cluster = iris$grp)
)
  Sepal Length
Predictors Estimates std. Beta CI standardized CI p std. p
(Intercept) 1.55 0.97 0.76 – 2.35 0.82 – 1.12 <0.001 <0.001
Species [versicolor] 0.41 -1.29 -1.17 – 1.99 -1.95 – -0.63 0.608 <0.001
Species [virginica] -0.41 -1.81 -0.78 – -0.03 -2.26 – -1.37 0.033 <0.001
Sepal Width 0.66 0.35 0.46 – 0.86 0.24 – 0.45 <0.001 <0.001
Petal Length 0.82 1.74 0.72 – 0.91 1.54 – 1.94 <0.001 <0.001
Species [versicolor] *
Sepal Width
-0.48 -0.25 -1.18 – 0.21 -0.62 – 0.11 0.172 0.172
Species [virginica] *
Sepal Width
-0.36 -0.19 -0.57 – -0.15 -0.30 – -0.08 0.001 0.001
Random Effects
σ2 0.09
τ00 grp 0.01
ICC 0.07
N grp 3
Observations 150
Marginal R2 / Conditional R2 0.860 / 0.870
sjPlot/inst/doc/tab_model_robust.R0000644000176200001440000002162314150131566016724 0ustar liggesusersparams <- list(EVAL = TRUE) ## ----message=FALSE, warning=FALSE, include=FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE ) if (!requireNamespace("dplyr", quietly = TRUE) || !requireNamespace("sandwich", quietly = TRUE) || !requireNamespace("lme4", quietly = TRUE) || !requireNamespace("clubSandwich", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) library(sjPlot) library(dplyr) } set.seed(333) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- data(iris) model <- lm(Petal.Length ~ Sepal.Length * Species + Sepal.Width, data = iris) # model parameters, where SE, CI and p-values are based on robust estimation tab_model(model, vcov.fun = "HC", show.se = TRUE) # compare standard errors to result from sandwich-package unname(sqrt(diag(sandwich::vcovHC(model)))) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # change estimation-type tab_model(model, vcov.fun = "CL", vcov.type = "HC1", show.se = TRUE) # compare standard errors to result from sandwich-package unname(sqrt(diag(sandwich::vcovCL(model)))) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- iris$cluster <- factor(rep(LETTERS[1:8], length.out = nrow(iris))) # change estimation-type, defining additional arguments tab_model( model, vcov.fun = "CL", vcov.type = "HC1", vcov.args = list(cluster = iris$cluster), show.se = TRUE ) # compare standard errors to result from sandwich-package unname(sqrt(diag(sandwich::vcovCL(model, cluster = iris$cluster)))) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # create fake-cluster-variable, to demonstrate cluster robust standard errors iris$cluster <- factor(rep(LETTERS[1:8], length.out = nrow(iris))) # cluster-robust estimation tab_model( model, vcov.fun = "CR", vcov.type = "CR1", vcov.args = list(cluster = iris$cluster), show.se = TRUE ) # compare standard errors to result from clubSsandwich-package unname(sqrt(diag(clubSandwich::vcovCR(model, type = "CR1", cluster = iris$cluster)))) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # model parameters, robust estimation on standardized model tab_model( model, show.std = "std", vcov.fun = "HC" ) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- library(lme4) data(iris) set.seed(1234) iris$grp <- as.factor(sample(1:3, nrow(iris), replace = TRUE)) # fit example model model <- lme4::lmer( Sepal.Length ~ Species * Sepal.Width + Petal.Length + (1 | grp), data = iris ) # normal model parameters, like from 'summary()' tab_model(model) # model parameters, cluster robust estimation for mixed models tab_model( model, vcov.fun = "CR", vcov.type = "CR1", vcov.args = list(cluster = iris$grp) ) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # model parameters, cluster robust estimation on standardized mixed model tab_model( model, show.std = "std", vcov.fun = "CR", vcov.type = "CR1", vcov.args = list(cluster = iris$grp) ) sjPlot/inst/doc/blackwhitefigures.R0000644000176200001440000000302314150131377017074 0ustar liggesusers## ----echo = FALSE------------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>", dev = "png", fig.width = 7, fig.height = 5, message = FALSE, warning = FALSE) if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("haven", quietly = TRUE) || !requireNamespace("ggplot2", quietly = TRUE) || !requireNamespace("sjlabelled", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ## ----------------------------------------------------------------------------- library(sjPlot) library(sjmisc) library(sjlabelled) library(ggplot2) theme_set(theme_bw()) data(efc) plot_grpfrq(efc$e42dep, efc$c172code, geom.colors = "gs") ## ----------------------------------------------------------------------------- # create binrary response y <- ifelse(efc$neg_c_7 < median(na.omit(efc$neg_c_7)), 0, 1) # create data frame for fitting model df <- data.frame( y = to_factor(y), sex = to_factor(efc$c161sex), dep = to_factor(efc$e42dep), barthel = efc$barthtot, education = to_factor(efc$c172code) ) # set variable label for response set_label(df$y) <- "High Negative Impact" # fit model fit <- glm(y ~., data = df, family = binomial(link = "logit")) # plot marginal effects plot_model( fit, type = "pred", terms = c("barthel", "sex","dep"), colors = "bw", ci.lvl = NA ) ## ----------------------------------------------------------------------------- # plot coefficients plot_model(fit, colors = "black") sjPlot/inst/doc/tab_mixed.Rmd0000644000176200001440000000702613612122336015653 0ustar liggesusers--- title: "Summary of Mixed Models as HTML Table" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette params: EVAL: !r identical(Sys.getenv("NOT_CRAN"), "true") --- ```{r echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE ) if (!requireNamespace("lme4", quietly = TRUE) || !requireNamespace("glmmTMB", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) } ``` This vignette shows examples for using `tab_model()` to create HTML tables for mixed models. Basically, `tab_model()` behaves in a very similar way for mixed models as for other, simple regression models, as shown [in this vignette](tab_model_estimates.html). ```{r, results='hide', message=FALSE, warning=FALSE} # load required packages library(sjPlot) library(lme4) data("sleepstudy") data("efc") efc$cluster <- as.factor(efc$e15relat) ``` ## Mixed models summaries as HTML table Unlike tables for [non-mixed models](tab_model_estimates.html), `tab_models()` adds additional information on the random effects to the table output for mixed models. You can hide these information with `show.icc = FALSE` and `show.re.var = FALSE`. Furthermore, the R-squared values are marginal and conditional R-squared statistics, based on _Nakagawa et al. 2017_. ```{r} m1 <- lmer(neg_c_7 ~ c160age + c161sex + e42dep + (1 | cluster), data = efc) m2 <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) tab_model(m1, m2) ``` The marginal R-squared considers only the variance of the fixed effects, while the conditional R-squared takes both the fixed and random effects into account. The p-value is a simple approximation, based on the t-statistics and using the normal distribution function. A more precise p-value can be computed using `p.val = "kr"`. In this case, which only applies to linear mixed models, the computation of p-values is based on conditional F-tests with Kenward-Roger approximation for the degrees of freedom (using the using the **pbkrtest**-package). Note that here the computation is more time consuming and thus not used as default. You can also display the approximated degrees of freedom with `show.df`. ```{r} tab_model(m1, p.val = "kr", show.df = TRUE) ``` ## Generalized linear mixed models `tab_model()` can also print and combine models with different link-functions. ```{r} data("efc") efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1) efc$cluster <- as.factor(efc$e15relat) m3 <- glmer( neg_c_7d ~ c160age + c161sex + e42dep + (1 | cluster), data = efc, family = binomial(link = "logit") ) tab_model(m1, m3) ``` ## More complex models Finally, an example from the **glmmTMB**-package to show how easy it is to print zero-inflated generalized linear mixed models as HTML table. ```{r} library(glmmTMB) data("Salamanders") m4 <- glmmTMB( count ~ spp + mined + (1 | site), ziformula = ~ spp + mined, family = truncated_poisson(link = "log"), data = Salamanders ) tab_model(m1, m3, m4, show.ci = FALSE) ``` ## References Nakagawa S, Johnson P, Schielzeth H (2017) _The coefficient of determination R2 and intra-class correlation coefficient from generalized linear mixed-effects models revisted and expanded._ J. R. Soc. Interface 14. doi: 10.1098/rsif.2017.0213 sjPlot/inst/doc/plot_model_estimates.R0000644000176200001440000003431514150131447017614 0ustar liggesusers## ----echo = FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>", dev = "png", fig.width = 7, fig.height = 5, message = FALSE, warning = FALSE) if (!requireNamespace("sjlabelled", quietly = TRUE) || !requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("ggplot2", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) library(sjPlot) } ## ----results='hide'----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- library(sjPlot) library(sjlabelled) library(sjmisc) library(ggplot2) data(efc) theme_set(theme_sjplot()) ## ----results='hide'----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # create binary response y <- ifelse(efc$neg_c_7 < median(na.omit(efc$neg_c_7)), 0, 1) # create data frame for fitting model df <- data.frame( y = to_factor(y), sex = to_factor(efc$c161sex), dep = to_factor(efc$e42dep), barthel = efc$barthtot, education = to_factor(efc$c172code) ) # set variable label for response set_label(df$y) <- "High Negative Impact" # fit model m1 <- glm(y ~., data = df, family = binomial(link = "logit")) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_model(m1) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_model(m1, vline.color = "red") ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_model(m1, sort.est = TRUE) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- summary(m1) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_model(m1, order.terms = c(6, 7, 1, 2, 3, 4, 5)) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_model(m1, transform = NULL) plot_model(m1, transform = "plogis") ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_model(m1, show.values = TRUE, value.offset = .3) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- data(iris) m2 <- lm(Sepal.Length ~ Sepal.Width + Petal.Length + Species, data = iris) # variable names as labels, but made "human readable" # separating dots are removed plot_model(m2) # to use variable names even for labelled data plot_model(m1, axis.labels = "", title = "my own title") ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # keep only coefficients sex2, dep2 and dep3 plot_model(m1, terms = c("sex2", "dep2", "dep3")) # remove coefficients sex2, dep2 and dep3 plot_model(m1, rm.terms = c("sex2", "dep2", "dep3")) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_model(m2, type = "std") ## ----results='hide'----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- if (require("rstanarm", quietly = TRUE)) { # make sure we apply a nice theme library(ggplot2) theme_set(theme_sjplot()) data(mtcars) m <- stan_glm(mpg ~ wt + am + cyl + gear, data = mtcars, chains = 1) # default model plot_model(m) # same model, with mean point estimate, dot-style for point estimate # and different inner/outer probabilities of the HDI plot_model( m, bpe = "mean", bpe.style = "dot", prob.inner = .4, prob.outer = .8 ) } ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_model( m1, colors = "Accent", show.values = TRUE, value.offset = .4, value.size = 4, dot.size = 3, line.size = 1.5, vline.color = "blue", width = 1.5 ) sjPlot/inst/doc/custplot.html0000644000176200001440000052775314150131403016025 0ustar liggesusers Customize Plot Appearance

Customize Plot Appearance

Daniel Lüdecke

2021-11-26

This vignette shows how the plots created by the sjp.* and plot_model() functions of the sjPlot package can be customized.

The examples refer to plot_grpfrq(), but most arguments are similar across all plotting function of the sjPlot package.

Tweaking plot appearance

The base function to globally change theme option for all sjp-function is set_theme(). Except for geom-colors and geom-sizes, all theme-options can be set via this function. This new theme will be applied to all following plots created with the sjPlot package.

There are various arguments to change colors, sizes, angles etc. of labels. Following example show changes to colors, sizes, angles, geom-outlines and theme.

# load libraries
library(sjPlot)  # for plotting
library(sjmisc)  # for sample data
library(ggplot2) # to access ggplot-themes

# load sample data set
data(efc)

set_theme(
  geom.outline.color = "antiquewhite4", 
  geom.outline.size = 1, 
  geom.label.size = 2,
  geom.label.color = "grey50",
  title.color = "red", 
  title.size = 1.5, 
  axis.angle.x = 45, 
  axis.textcolor = "blue", 
  base = theme_bw()
)

plot_grpfrq(
  efc$e42dep, 
  efc$e16sex, 
  title = NULL, 
  geom.colors = c("cadetblue", "coral"), 
  geom.size = 0.4
)

Using the Color Brewer palettes

All plotting functions support the usage of the Colorbrewer palettes. To apply a color brewer palette, use specify the palette as geom.colors. Any valid color brewer palette is recognized automatically.

# blank theme
set_theme(
  base = theme_blank(),
  axis.title.size = .9,
  axis.textsize = .9,
  legend.size = .7,
  legend.title.size = .8,
  geom.label.size = 3
)

plot_grpfrq(
  efc$e42dep, 
  efc$e15relat, 
  geom.colors = "PuRd", 
  show.values = FALSE
)

An overview of all supported color codes can be obtained with display.brewer.all() from the RColorBrewer package.

library(RColorBrewer)
display.brewer.all()

Plot with flipped coordinates

The plot’s axes can be flipped using coord.flip = TRUE. If needed, labels can be placed inside the bars with the vjust or hjust arguments. In such cases, you might need to adjust the label colors with geom.label.color = "white".

set_theme(geom.label.color = "white", geom.label.size = 3)

# labels appear very large due to export metrics
plot_grpfrq(efc$e42dep, efc$e16sex, coord.flip = TRUE)

Adding plot margins

Plots with no margins towards the axes may look strange to some people (not to me, though). To restore the ggplot-default behaviour, use the expand.grid argument:

plot_grpfrq(efc$e42dep, efc$e16sex, expand.grid = TRUE)

Theme options

You can use any pre-defined theme from ggplot, like theme_bw(), theme_classic() or theme_minimal() as default theme.

set_theme(base = theme_light())
plot_frq(efc$e42dep)

Pre-defined themes

There is a set of pre-defined themes from the sjPlot-package. See ?"sjPlot-themes" for a complete list.

library(sjmisc)
data(efc)
efc <- to_factor(efc, e42dep, c172code)
m <- lm(neg_c_7 ~ pos_v_4 + c12hour + e42dep + c172code, data = efc)

# reset theme
set_theme(base = theme_grey())

# forest plot of regression model
p <- plot_model(m)

# default theme
p

# pre-defined theme
p + theme_sjplot()

Pre-defined scales

There is also a new scale for ggplot-objects, scale_color_sjplot() and scale_fill_sjplot().

p + 
  theme_sjplot2() + 
  scale_color_sjplot("simply")

To see all currently available color sets, use show_sjplot_pals().

show_sjplot_pals()

Set up own themes based on existing themes

If you want to use a specific theme as base for building your own theme, use the base argument. When using base instead of theme, further arguments for settings colors etc. are not ignored.

set_theme(base = theme_bw(), axis.linecolor = "darkgreen")
plot_frq(efc$e42dep)

Further customization options

Each plotting function invisibly returns the ggplot-object. You can further add options to customize the appearance of the plot, like in the following example, where the count axis is hidden (color set to white):

set_theme(
  base = theme_classic(),
  axis.tickslen = 0, # hides tick marks
  axis.title.size = .9,
  axis.textsize = .9,
  legend.size = .7,
  legend.title.size = .8,
  geom.label.size = 3.5
)
  
plot_grpfrq(
  efc$e42dep,
  efc$e16sex,
  coord.flip = TRUE,
  show.axis.values = FALSE
) +
  theme(axis.line.x = element_line(color = "white"))

Plot legend

The plot’s legend can be customized via various legend.-arguments, see following examples:

set_theme(
  base = theme_classic(), 
  legend.title.face = "italic", # title font face
  legend.inside = TRUE,         # legend inside plot
  legend.color = "grey50",      # legend label color
  legend.pos = "bottom right",  # legend position inside plot
  axis.title.size = .9,
  axis.textsize = .9,
  legend.size = .7,
  legend.title.size = .8,
  geom.label.size = 3
)

plot_grpfrq(efc$e42dep, efc$e16sex, coord.flip = TRUE)

set_theme(
  base = theme_classic(), 
  axis.linecolor = "white",     # "remove" axis lines
  axis.textcolor.y = "darkred", # set axis label text only for y axis
  axis.tickslen = 0,            # "remove" tick marks
  legend.title.color = "red",   # legend title color
  legend.title.size = 2,        # legend title size
  legend.color = "green",       # legend label color
  legend.pos = "top",           # legend position above plot
  axis.title.size = .9,
  axis.textsize = .9,
  legend.size = .7,
  geom.label.size = 3
)

plot_grpfrq(efc$e42dep, efc$e16sex)

sjPlot/inst/doc/plot_likert_scales.R0000644000176200001440000001625314150131421017253 0ustar liggesusers## ----set-options, echo = FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ knitr::opts_chunk$set(collapse = TRUE, comment = "#>", dev = "png", fig.width = 7, fig.height = 6, message = FALSE, warning = FALSE) options(width = 800, tibble.width = Inf) if (!requireNamespace("dplyr", quietly = TRUE) || !requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("parameters", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) } ## ----fig.height = 5.5--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- library(dplyr) library(sjPlot) library(sjmisc) library(parameters) data(efc) # find all variables from COPE-Index, which all have a "cop" in their # variable name, and then plot that subset as likert-plot mydf <- find_var(efc, pattern = "cop", out = "df") plot_likert(mydf) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_likert( mydf, grid.range = c(1.2, 1.4), expand.grid = FALSE, values = "sum.outside", show.prc.sign = TRUE ) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # Plot in groups plot_likert(mydf, groups = c(2, 1, 1, 1, 1, 2, 2, 2, 1)) ## ----fig.height = 6.5--------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- pca <- parameters::principal_components(mydf) groups <- parameters::closest_component(pca) plot_likert(mydf, groups = groups) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_likert( mydf, c(rep("B", 4), rep("A", 5)), sort.groups = FALSE, grid.range = c(0.9, 1.1), geom.colors = "RdBu", rel_heights = c(6, 8), wrap.labels = 40, reverse.scale = TRUE ) ## ----fig.height = 5----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # control legend items six_cat_example = data.frame( matrix(sample(1:6, 600, replace = TRUE), ncol = 6) ) six_cat_example <- six_cat_example %>% dplyr::mutate_all( ~ ordered(., labels = c("+++", "++", "+", "-", "--", "---"))) # Old default plot_likert( six_cat_example, groups = c(1, 1, 1, 2, 2, 2), group.legend.options = list(nrow = 2, byrow = FALSE) ) # New default plot_likert(six_cat_example, groups = c(1, 1, 1, 2, 2, 2)) sjPlot/inst/doc/tab_bayes.R0000644000176200001440000001366514150131513015330 0ustar liggesusersparams <- list(EVAL = TRUE) ## ----echo = FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE ) if (!requireNamespace("insight", quietly = TRUE) || !requireNamespace("httr", quietly = TRUE) || !requireNamespace("brms", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) } ## ---- results='hide', message=FALSE, warning=FALSE---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # load required packages library(sjPlot) library(insight) library(httr) library(brms) # load sample models # zinb <- read.csv("http://stats.idre.ucla.edu/stat/data/fish.csv") # set.seed(123) # m1 <- brm(bf( # count ~ persons + child + camper + (1 | persons), # zi ~ child + camper + (1 | persons) # ), # data = zinb, # family = zero_inflated_poisson() # ) m1 <- insight::download_model("brms_zi_2") # data(epilepsy) # set.seed(123) # epilepsy$visit <- as.numeric(epilepsy$visit) # epilepsy$Base2 <- sample(epilepsy$Base, nrow(epilepsy), replace = TRUE) # f1 <- bf(Base ~ zAge + count + (1 |ID| patient)) # f2 <- bf(Base2 ~ zAge + Trt + (1 |ID| patient)) # m2 <- brm(f1 + f2 + set_rescor(FALSE), data = epilepsy) m2 <- insight::download_model("brms_mv_3") ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m1) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m2) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m2, show.ci50 = TRUE) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m1, m2) sjPlot/inst/doc/plot_interactions.Rmd0000644000176200001440000001321414147735034017464 0ustar liggesusers--- title: "Plotting Interaction Effects of Regression Models" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Plotting Interaction Effects of Regression Models} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r set-options, echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", dev = "png", fig.width = 7, fig.height = 3.5, message = FALSE, warning = FALSE) options(width = 800, tibble.width = Inf) if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("ggplot2", quietly = TRUE) || !requireNamespace("sjlabelled", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ``` This document describes how to plot marginal effects of interaction terms from various regression models, using the `plot_model()` function. `plot_model()` is a generic plot-function, which accepts many model-objects, like `lm`, `glm`, `lme`, `lmerMod` etc. `plot_model()` allows to create various plot tyes, which can be defined via the `type`-argument. The default is `type = "fe"`, which means that fixed effects (model coefficients) are plotted. To plot marginal effects of interaction terms, call `plot_model()` with: * `type = "pred"` to plot predicted values (marginal effects) for specific model terms, including interaction terms. * `type = "eff"`, which is similar to `type = "pred"`, however, discrete predictors are held constant at their proportions (not reference level). It internally calls \code{\link[effects]{Effect}} via \code{\link[ggeffects]{ggeffect}}. * `type = "emm"`, which is similar to `type = "eff"`. It internally calls \code{\link[emmeans]{emmeans}} via \code{\link[ggeffects]{ggemmeans}}. * `type = "int"` to plot marginal effects of interaction terms in a more convenient way. `plot_model()` supports [labelled data](https://cran.r-project.org/package=sjlabelled) and automatically uses variable and value labels to annotate the plot. This works with most regression modelling functions. ***Note:** For marginal effects plots, **sjPlot** calls functions from the [**ggeffects-package**](https://strengejacke.github.io/ggeffects/). If you need more flexibility when creating marginal effects plots, consider directly using the **ggeffects**-package.* # Two-Way-Interactions _Note: To better understand the principle of plotting interaction terms, it might be helpful to read the vignette on [marginal effects](plot_marginal_effects.html) first._ To plot marginal effects of interaction terms, at least two model terms need to be specified (the terms that define the interaction) in the `terms`-argument, for which the effects are computed. To plot marginal effects for three-way-interactions, all three terms need to be specified in `terms`. A convenient way to automatically plot interactions is `type = "int"`, which scans the model formula for interaction terms and then uses these as `terms`-argument. ```{r} library(sjPlot) library(sjmisc) library(ggplot2) data(efc) theme_set(theme_sjplot()) # make categorical efc$c161sex <- to_factor(efc$c161sex) # fit model with interaction fit <- lm(neg_c_7 ~ c12hour + barthtot * c161sex, data = efc) plot_model(fit, type = "pred", terms = c("barthtot", "c161sex")) ``` For `type = "int"`, no terms need to be specified. Note that this plot type automatically uses the first interaction term in the formula for the x-axis, while the second term is used as grouping factor. Furthermore, if continuous variables are used as second term, you can specify preset-values for this term with the `mdrt.values`-argument, which are then used as grouping levels. In this example, the second term is a factor with two levels (male/female), so there is no need for choosing specific values for the moderator. ```{r} plot_model(fit, type = "int") ``` To switch the terms, in this example _barthtot_ and _c161sex_, simply switch the order of these terms on the `terms`-argument and use `type = "pred"`. ```{r} plot_model(fit, type = "pred", terms = c("c161sex", "barthtot [0, 100]")) ``` To switch the terms for plot-type `type = "int"`, you need to re-fit the model and change the formula accordingly, i.e. using _c161sex_ as first term in the interaction. ```{r} # fit model with interaction, switching terms in formula fit <- lm(neg_c_7 ~ c12hour + c161sex * barthtot, data = efc) plot_model(fit, type = "int") ``` By default, for continuous variables, the minimum and maximum values are chosen as grouping levels, which are 0 and 100 - that's why the previous two plots are identical. You have other options as well, e.g. the mean-value and +/- 1 standard deviation (as suggested by Cohen and Cohen for continuous variables and popularized by Aiken and West 1991), which can be specified using `mdrt.values`. ```{r} plot_model(fit, type = "int", mdrt.values = "meansd") ``` # Three-Way-Interactions Since the `terms`-argument accepts up to three model terms, you can also compute marginal effects for a 3-way-interaction. ```{r} # fit model with 3-way-interaction fit <- lm(neg_c_7 ~ c12hour * barthtot * c161sex, data = efc) # select only levels 30, 50 and 70 from continuous variable Barthel-Index plot_model(fit, type = "pred", terms = c("c12hour", "barthtot [30,50,70]", "c161sex")) ``` Again, `type = "int"` will automatically plot the interaction terms, however, using `mdrt.values = "minmax"` as default - in this case, the "levels" 0 and 100 from continuous variable _barthtot_ are chosen by default. ```{r} plot_model(fit, type = "int") ``` # References Aiken and West (1991). _Multiple Regression: Testing and Interpreting Interactions._ sjPlot/inst/doc/tab_model_estimates.Rmd0000644000176200001440000002606714073077247017745 0ustar liggesusers--- title: "Summary of Regression Models as HTML Table" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Summary of Regression Models as HTML Table} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE) if (!requireNamespace("sjlabelled", quietly = TRUE) || !requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("lme4", quietly = TRUE) || !requireNamespace("pscl", quietly = TRUE) || !requireNamespace("glmmTMB", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) library(sjPlot) } ``` `tab_model()` is the pendant to `plot_model()`, however, instead of creating plots, `tab_model()` creates HTML-tables that will be displayed either in your IDE's viewer-pane, in a web browser or in a knitr-markdown-document (like this vignette). HTML is the only output-format, you can't (directly) create a LaTex or PDF output from `tab_model()` and related table-functions. However, it is possible to easily export the tables into Microsoft Word or Libre Office Writer. This vignette shows how to create table from regression models with `tab_model()`. There's a dedicated vignette that demonstrate how to change the [table layout and appearance with CSS](table_css.html). **Note!** Due to the custom CSS, the layout of the table inside a knitr-document differs from the output in the viewer-pane and web browser! ```{r} # load package library(sjPlot) library(sjmisc) library(sjlabelled) # sample data data("efc") efc <- as_factor(efc, c161sex, c172code) ``` ## A simple HTML table from regression results First, we fit two linear models to demonstrate the `tab_model()`-function. ```{r, results='hide'} m1 <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) m2 <- lm(neg_c_7 ~ c160age + c12hour + c161sex + e17age, data = efc) ``` The simplest way of producing the table output is by passing the fitted model as parameter. By default, estimates, confidence intervals (_CI_) and p-values (_p_) are reported. As summary, the numbers of observations as well as the R-squared values are shown. ```{r} tab_model(m1) ``` ## Automatic labelling As the **sjPlot**-packages features [labelled data](https://strengejacke.github.io/sjlabelled/), the coefficients in the table are already labelled in this example. The name of the dependent variable(s) is used as main column header for each model. For non-labelled data, the coefficient names are shown. ```{r} data(mtcars) m.mtcars <- lm(mpg ~ cyl + hp + wt, data = mtcars) tab_model(m.mtcars) ``` If factors are involved and `auto.label = TRUE`, "pretty" parameters names are used (see [`format_parameters()`](https://easystats.github.io/parameters/reference/format_parameters.html). ```{r} set.seed(2) dat <- data.frame( y = runif(100, 0, 100), drug = as.factor(sample(c("nonsense", "useful", "placebo"), 100, TRUE)), group = as.factor(sample(c("control", "treatment"), 100, TRUE)) ) pretty_names <- lm(y ~ drug * group, data = dat) tab_model(pretty_names) ``` ### Turn off automatic labelling To turn off automatic labelling, use `auto.label = FALSE`, or provide an empty character vector for `pred.labels` and `dv.labels`. ```{r} tab_model(m1, auto.label = FALSE) ``` Same for models with non-labelled data and factors. ```{r} tab_model(pretty_names, auto.label = FALSE) ``` ## More than one model `tab_model()` can print multiple models at once, which are then printed side-by-side. Identical coefficients are matched in a row. ```{r} tab_model(m1, m2) ``` ## Generalized linear models For generalized linear models, the ouput is slightly adapted. Instead of _Estimates_, the column is named _Odds Ratios_, _Incidence Rate Ratios_ etc., depending on the model. The coefficients are in this case automatically converted (exponentiated). Furthermore, pseudo R-squared statistics are shown in the summary. ```{r} m3 <- glm( tot_sc_e ~ c160age + c12hour + c161sex + c172code, data = efc, family = poisson(link = "log") ) efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1) m4 <- glm( neg_c_7d ~ c161sex + barthtot + c172code, data = efc, family = binomial(link = "logit") ) tab_model(m3, m4) ``` ### Untransformed estimates on the linear scale To plot the estimates on the linear scale, use `transform = NULL`. ```{r} tab_model(m3, m4, transform = NULL, auto.label = FALSE) ``` ## More complex models Other models, like hurdle- or zero-inflated models, also work with `tab_model()`. In this case, the zero inflation model is indicated in the table. Use `show.zeroinf = FALSE` to hide this part from the table. ```{r} library(pscl) data("bioChemists") m5 <- zeroinfl(art ~ fem + mar + kid5 + ment | kid5 + phd + ment, data = bioChemists) tab_model(m5) ``` You can combine any model in one table. ```{r} tab_model(m1, m3, m5, auto.label = FALSE, show.ci = FALSE) ``` ## Show or hide further columns `tab_model()` has some argument that allow to show or hide specific columns from the output: * `show.est` to show/hide the column with model estimates. * `show.ci` to show/hide the column with confidence intervals. * `show.se` to show/hide the column with standard errors. * `show.std` to show/hide the column with standardized estimates (and their standard errors). * `show.p` to show/hide the column with p-values. * `show.stat` to show/hide the column with the coefficients' test statistics. * `show.df` for linear mixed models, when p-values are based on degrees of freedom with Kenward-Rogers approximation, these degrees of freedom are shown. ### Adding columns In the following example, standard errors, standardized coefficients and test statistics are also shown. ```{r} tab_model(m1, show.se = TRUE, show.std = TRUE, show.stat = TRUE) ``` ### Removing columns In the following example, default columns are removed. ```{r} tab_model(m3, m4, show.ci = FALSE, show.p = FALSE, auto.label = FALSE) ``` ### Removing and sorting columns Another way to remove columns, which also allows to reorder the columns, is the `col.order`-argument. This is a character vector, where each element indicates a column in the output. The value `"est"`, for instance, indicates the estimates, while `"std.est"` is the column for standardized estimates and so on. By default, `col.order` contains all possible columns. All columns that should shown (see previous tables, for example using `show.se = TRUE` to show standard errors, or `show.st = TRUE` to show standardized estimates) are then printed by default. Colums that are _excluded_ from `col.order` are _not shown_, no matter if the `show*`-arguments are `TRUE` or `FALSE`. So if `show.se = TRUE`, but`col.order` does not contain the element `"se"`, standard errors are not shown. On the other hand, if `show.est = FALSE`, but `col.order` _does include_ the element `"est"`, the columns with estimates are not shown. In summary, `col.order` can be used to _exclude_ columns from the table and to change the order of colums. ```{r} tab_model( m1, show.se = TRUE, show.std = TRUE, show.stat = TRUE, col.order = c("p", "stat", "est", "std.se", "se", "std.est") ) ``` ### Collapsing columns With `collapse.ci` and `collapse.se`, the columns for confidence intervals and standard errors can be collapsed into one column together with the estimates. Sometimes this table layout is required. ```{r} tab_model(m1, collapse.ci = TRUE) ``` ## Defining own labels There are different options to change the labels of the column headers or coefficients, e.g. with: * `pred.labels` to change the names of the coefficients in the _Predictors_ column. Note that the length of `pred.labels` must exactly match the amount of predictors in the _Predictor_ column. * `dv.labels` to change the names of the model columns, which are labelled with the variable labels / names from the dependent variables. * Further more, there are various `string.*`-arguments, to change the name of column headings. ```{r} tab_model( m1, m2, pred.labels = c("Intercept", "Age (Carer)", "Hours per Week", "Gender (Carer)", "Education: middle (Carer)", "Education: high (Carer)", "Age (Older Person)"), dv.labels = c("First Model", "M2"), string.pred = "Coeffcient", string.ci = "Conf. Int (95%)", string.p = "P-Value" ) ``` ## Including reference level of categorical predictors By default, for categorical predictors, the variable names and the categories for regression coefficients are shown in the table output. ```{r} library(glmmTMB) data("Salamanders") model <- glm( count ~ spp + Wtemp + mined + cover, family = poisson(), data = Salamanders ) tab_model(model) ``` You can include the reference level for categorical predictors by setting `show.reflvl = TRUE`. ```{r} tab_model(model, show.reflvl = TRUE) ``` To show variable names, categories and include the reference level, also set `prefix.labels = "varname"`. ```{r} tab_model(model, show.reflvl = TRUE, prefix.labels = "varname") ``` ## Style of p-values You can change the style of how p-values are displayed with the argument `p.style`. With `p.style = "stars"`, the p-values are indicated as `*` in the table. ```{r} tab_model(m1, m2, p.style = "stars") ``` Another option would be scientific notation, using `p.style = "scientific"`, which also can be combined with `digits.p`. ```{r} tab_model(m1, m2, p.style = "scientific", digits.p = 2) ``` ### Automatic matching for named vectors Another way to easily assign labels are _named vectors_. In this case, it doesn't matter if `pred.labels` has more labels than coefficients in the model(s), or in which order the labels are passed to `tab_model()`. The only requirement is that the labels' names equal the coefficients names as they appear in the `summary()`-output. ```{r} # example, coefficients are "c161sex2" or "c172code3" summary(m1) pl <- c( `(Intercept)` = "Intercept", e17age = "Age (Older Person)", c160age = "Age (Carer)", c12hour = "Hours per Week", barthtot = "Barthel-Index", c161sex2 = "Gender (Carer)", c172code2 = "Education: middle (Carer)", c172code3 = "Education: high (Carer)", a_non_used_label = "We don't care" ) tab_model( m1, m2, m3, m4, pred.labels = pl, dv.labels = c("Model1", "Model2", "Model3", "Model4"), show.ci = FALSE, show.p = FALSE, transform = NULL ) ``` ## Keep or remove coefficients from the table Using the `terms`- or `rm.terms`-argument allows us to explicitly show or remove specific coefficients from the table output. ```{r} tab_model(m1, terms = c("c160age", "c12hour")) ``` Note that the names of terms to keep or remove should match the coefficients names. For categorical predictors, one example would be: ```{r} tab_model(m1, rm.terms = c("c172code2", "c161sex2")) ``` sjPlot/inst/doc/custplot.Rmd0000644000176200001440000001514414147735034015605 0ustar liggesusers--- title: "Customize Plot Appearance" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Customize Plot Appearance} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, warning = FALSE, message = FALSE) if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("haven", quietly = TRUE) || !requireNamespace("ggplot2", quietly = TRUE) || !requireNamespace("sjlabelled", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ``` This vignette shows how the plots created by the `sjp.*` and `plot_model()` functions of the **sjPlot** package can be customized. The examples refer to `plot_grpfrq()`, but most arguments are similar across all plotting function of the **sjPlot** package. ## Tweaking plot appearance The base function to globally change theme option for all sjp-function is `set_theme()`. Except for geom-colors and geom-sizes, all theme-options can be set via this function. This new theme will be applied to all following plots created with the **sjPlot** package. There are various arguments to change colors, sizes, angles etc. of labels. Following example show changes to colors, sizes, angles, geom-outlines and theme. ```{r} # load libraries library(sjPlot) # for plotting library(sjmisc) # for sample data library(ggplot2) # to access ggplot-themes # load sample data set data(efc) set_theme( geom.outline.color = "antiquewhite4", geom.outline.size = 1, geom.label.size = 2, geom.label.color = "grey50", title.color = "red", title.size = 1.5, axis.angle.x = 45, axis.textcolor = "blue", base = theme_bw() ) plot_grpfrq( efc$e42dep, efc$e16sex, title = NULL, geom.colors = c("cadetblue", "coral"), geom.size = 0.4 ) ``` ## Using the Color Brewer palettes All plotting functions support the usage of the [Colorbrewer]( https://colorbrewer2.org/) palettes. To apply a color brewer palette, use specify the palette as `geom.colors`. Any valid color brewer palette is recognized automatically. ```{r} # blank theme set_theme( base = theme_blank(), axis.title.size = .9, axis.textsize = .9, legend.size = .7, legend.title.size = .8, geom.label.size = 3 ) plot_grpfrq( efc$e42dep, efc$e15relat, geom.colors = "PuRd", show.values = FALSE ) ``` An overview of all supported color codes can be obtained with `display.brewer.all()` from the `RColorBrewer` package. ```{r, eval=FALSE} library(RColorBrewer) display.brewer.all() ``` ## Plot with flipped coordinates The plot's axes can be flipped using `coord.flip = TRUE`. If needed, labels can be placed inside the bars with the `vjust` or `hjust` arguments. In such cases, you might need to adjust the label colors with `geom.label.color = "white"`. ```{r} set_theme(geom.label.color = "white", geom.label.size = 3) # labels appear very large due to export metrics plot_grpfrq(efc$e42dep, efc$e16sex, coord.flip = TRUE) ``` ## Adding plot margins Plots with no margins towards the axes may look strange to some people (not to me, though). To restore the ggplot-default behaviour, use the `expand.grid` argument: ```{r results='hide', echo=FALSE} set_theme( axis.title.size = .9, axis.textsize = .9, legend.size = .7, legend.title.size = .8, geom.label.size = 3 ) ``` ```{r} plot_grpfrq(efc$e42dep, efc$e16sex, expand.grid = TRUE) ``` ## Theme options You can use any pre-defined theme from ggplot, like `theme_bw()`, `theme_classic()` or `theme_minimal()` as default theme. ```{r} set_theme(base = theme_light()) plot_frq(efc$e42dep) ``` ## Pre-defined themes There is a set of pre-defined themes from the sjPlot-package. See `?"sjPlot-themes"` for a complete list. ```{r} library(sjmisc) data(efc) efc <- to_factor(efc, e42dep, c172code) m <- lm(neg_c_7 ~ pos_v_4 + c12hour + e42dep + c172code, data = efc) # reset theme set_theme(base = theme_grey()) # forest plot of regression model p <- plot_model(m) # default theme p # pre-defined theme p + theme_sjplot() ``` ## Pre-defined scales There is also a new scale for **ggplot**-objects, `scale_color_sjplot()` and `scale_fill_sjplot()`. ```{r} p + theme_sjplot2() + scale_color_sjplot("simply") ``` To see all currently available color sets, use `show_sjplot_pals()`. ```{r} show_sjplot_pals() ``` ## Set up own themes based on existing themes If you want to use a specific theme as base for building your own theme, use the `base` argument. When using `base` instead of `theme`, further arguments for settings colors etc. are not ignored. ```{r} set_theme(base = theme_bw(), axis.linecolor = "darkgreen") plot_frq(efc$e42dep) ``` ## Further customization options Each plotting function invisibly returns the ggplot-object. You can further add options to customize the appearance of the plot, like in the following example, where the count axis is hidden (color set to white): ```{r} set_theme( base = theme_classic(), axis.tickslen = 0, # hides tick marks axis.title.size = .9, axis.textsize = .9, legend.size = .7, legend.title.size = .8, geom.label.size = 3.5 ) plot_grpfrq( efc$e42dep, efc$e16sex, coord.flip = TRUE, show.axis.values = FALSE ) + theme(axis.line.x = element_line(color = "white")) ``` ## Plot legend The plot's legend can be customized via various `legend.`-arguments, see following examples: ```{r} set_theme( base = theme_classic(), legend.title.face = "italic", # title font face legend.inside = TRUE, # legend inside plot legend.color = "grey50", # legend label color legend.pos = "bottom right", # legend position inside plot axis.title.size = .9, axis.textsize = .9, legend.size = .7, legend.title.size = .8, geom.label.size = 3 ) plot_grpfrq(efc$e42dep, efc$e16sex, coord.flip = TRUE) ``` ```{r} set_theme( base = theme_classic(), axis.linecolor = "white", # "remove" axis lines axis.textcolor.y = "darkred", # set axis label text only for y axis axis.tickslen = 0, # "remove" tick marks legend.title.color = "red", # legend title color legend.title.size = 2, # legend title size legend.color = "green", # legend label color legend.pos = "top", # legend position above plot axis.title.size = .9, axis.textsize = .9, legend.size = .7, geom.label.size = 3 ) plot_grpfrq(efc$e42dep, efc$e16sex) ``` sjPlot/inst/doc/table_css.html0000644000176200001440000013056114150131571016100 0ustar liggesusers Customizing HTML tables

Customizing HTML tables

Daniel Lüdecke

2021-11-26

All tab_*-functions create a HTML page with the table output. This table, by default, is opened in the viewer pane of your IDE (in case you’re using an IDE that also supports the viewer pane). If a viewer pane is not available, the created HTML output is saved as temporary file and opened in your default web browser. The temporary files are deleted after your R session ends.

Copying table output to office or word processors

Export table as HTML file to open in word processors

You can save the HTML page as file for further usage by specifying the file-argument The saved HTML file can be opened by word processors like LibreOffice or Microsoft Office.

Drag and drop from browser or RStudio viewer pane

You can directly drag and drop a table from the RStudio viewer pane or browser into your word processor. Simply select the complete table with your mouse and drag it into office.

Customizing table output with the CSS parameter

The table output is in in HTML format. The table style (visual appearance) is formatted using Cascading Style Sheets (CSS). If you are a bit familiar with these topics, you can easily customize the appearance of the table output.

Many table elements (header, row, column, cell, summary row, first row or column…) have CSS-class attributes, which can be used to change the table style. Since each sjt.* function as well as tab_model() has different table elements and thus different class attributes, you first need to know which styles can be customized.

Retrieving customizable styles

The table functions invisibly return several values. The return value page.style contains the style information for the HTML table. You can print this style sheet to console using the cat()-function:

library(sjPlot)
data(efc)
m <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc)
tab <- tab_model(m)
cat(tab$page.style)
#> <style>
#> html, body { background-color: white; }
#> table { border-collapse:collapse; border:none; }
#> caption { font-weight: bold; text-align:left; }
#> td {  }
#> .thead { border-top: double; text-align:center; font-style:normal; font-weight:bold; padding:0.2cm; }
#> .tdata { padding:0.2cm; text-align:left; vertical-align:top; }
#> .arc { background-color:#f2f2f2; }
#> .summary { padding-top:0.1cm; padding-bottom:0.1cm; }
#> .summarydata { text-align:left; }
#> .fixedparts { font-weight:bold; text-align:left; }
#> .randomparts { font-weight:bold; text-align:left; padding-top:.8em; }
#> .zeroparts { font-weight:bold; text-align:left; padding-top:.8em; }
#> .simplexparts { font-weight:bold; text-align:left; padding-top:.8em; }
#> .lasttablerow { border-bottom: double; }
#> .firsttablerow {  }
#> .firstsumrow { border-top:1px solid; }
#> .labelcellborder { border-bottom:1px solid; }
#> .depvarhead { text-align:center; border-bottom:1px solid; font-style:italic; font-weight:normal; }
#> .depvarheadnodv { border-top: double; text-align:center; border-bottom:1px solid; font-style:italic; font-weight:normal; }
#> .leftalign { text-align:left; }
#> .centeralign { text-align:center; }
#> .firsttablecol { text-align:left; }
#> .footnote { font-style:italic; border-top:double black; text-align:right; }
#> .subtitle { font-weight: normal; }
#> .modelcolumn1 {  }
#> .modelcolumn2 {  }
#> .modelcolumn3 {  }
#> .modelcolumn4 {  }
#> .modelcolumn5 {  }
#> .modelcolumn6 {  }
#> .modelcolumn7 {  }
#> .col1 {  }
#> .col2 {  }
#> .col3 {  }
#> .col4 {  }
#> .col5 {  }
#> .col6 {  }
#> </style>

The HTML code is in the page.content return value. The following code prints the HTML code of the table to the R console:

cat(tab$page.content)
#> <table>
#> <tr>
#> <th class="thead firsttablerow firsttablecol col1">&nbsp;</th>
#> <th colspan="3" class="thead firsttablerow">Total score BARTHEL INDEX</th>
#> </tr>
#> <tr>
#> <td class="depvarhead firsttablerow firsttablecol col1">Predictors</td>
#> <td class="depvarhead firsttablerow col2">Estimates</td>
#> <td class="depvarhead firsttablerow col3">CI</td>
#> <td class="depvarhead firsttablerow col4">p</td>
#> </tr>
#> <tr>
#> <td class="tdata firsttablecol col1">(Intercept)</td>
#> <td class="tdata centeralign modelcolumn1 col2">90.06</td>
#> <td class="tdata centeralign modelcolumn1 col3">77.95&nbsp;&ndash;&nbsp;102.18</td>
#> <td class="tdata centeralign modelcolumn1 col4"><strong>&lt;0.001</strong></td>
#> </tr>
#> <tr>
#> <td class="tdata firsttablecol col1">carer'age</td>
#> <td class="tdata centeralign modelcolumn1 col2">&#45;0.22</td>
#> <td class="tdata centeralign modelcolumn1 col3">&#45;0.36&nbsp;&ndash;&nbsp;-0.08</td>
#> <td class="tdata centeralign modelcolumn1 col4"><strong>0.002</strong></td>
#> </tr>
#> <tr>
#> <td class="tdata firsttablecol col1">average number of hours<br>of care per week</td>
#> <td class="tdata centeralign modelcolumn1 col2">&#45;0.28</td>
#> <td class="tdata centeralign modelcolumn1 col3">&#45;0.31&nbsp;&ndash;&nbsp;-0.24</td>
#> <td class="tdata centeralign modelcolumn1 col4"><strong>&lt;0.001</strong></td>
#> </tr>
#> <tr>
#> <td class="tdata firsttablecol col1">carer's gender</td>
#> <td class="tdata centeralign modelcolumn1 col2">&#45;0.26</td>
#> <td class="tdata centeralign modelcolumn1 col3">&#45;4.36&nbsp;&ndash;&nbsp;3.83</td>
#> <td class="tdata centeralign modelcolumn1 col4">0.900</td>
#> </tr>
#> <tr>
#> <td class="tdata firsttablecol col1">carer's level of<br>education</td>
#> <td class="tdata centeralign modelcolumn1 col2">&#45;0.76</td>
#> <td class="tdata centeralign modelcolumn1 col3">&#45;3.55&nbsp;&ndash;&nbsp;2.02</td>
#> <td class="tdata centeralign modelcolumn1 col4">0.592</td>
#> </tr>
#> <tr>
#> <td class="tdata leftalign summary firstsumrow">Observations</td>
#> <td class="tdata summary summarydata firstsumrow" colspan="3">821</td>
#> </tr>
#> <tr>
#> <td class="tdata leftalign summary">R<sup>2</sup> / R<sup>2</sup> adjusted</td>
#> <td class="tdata summary summarydata" colspan="3">0.270 / 0.266</td>
#> </tr>
#> 
#> </table>

Now you can see which table elements are associated with which CSS class attributes.

Customizing table output with the CSS parameter

You can customize the table output with the CSS parameter. This parameter requires a list of attributes, which follow a certain pattern:

  1. each attributes needs a css. prefix
  2. followed by the class name (e.g. caption, thead, centeralign, etc.)
  3. equal-sign
  4. the CSS format (in (single) quotation marks)
  5. the CSS format must end with a colon (;)

Example:

tab_model(
  m,
  CSS = list(
    css.depvarhead = 'color: red;',
    css.centeralign = 'text-align: left;', 
    css.firsttablecol = 'font-weight: bold;', 
    css.summary = 'color: blue;'
  )
)
  Total score BARTHEL INDEX
Predictors Estimates CI p
(Intercept) 90.06 77.95 – 102.18 <0.001
carer’age -0.22 -0.36 – -0.08 0.002
average number of hours
of care per week
-0.28 -0.31 – -0.24 <0.001
carer’s gender -0.26 -4.36 – 3.83 0.900
carer’s level of
education
-0.76 -3.55 – 2.02 0.592
Observations 821
R2 / R2 adjusted 0.270 / 0.266

In the above example, the header row lost the original style and just became red. If you want to keep the original style and just add additional style information, use the plus-sign (+) as initial character for the parameter attributes. In the following example, the header row keeps its original style and is additionally printed in red:

tab_model(m, CSS = list(css.depvarhead = '+color: red;'))
  Total score BARTHEL INDEX
Predictors Estimates CI p
(Intercept) 90.06 77.95 – 102.18 <0.001
carer’age -0.22 -0.36 – -0.08 0.002
average number of hours
of care per week
-0.28 -0.31 – -0.24 <0.001
carer’s gender -0.26 -4.36 – 3.83 0.900
carer’s level of
education
-0.76 -3.55 – 2.02 0.592
Observations 821
R2 / R2 adjusted 0.270 / 0.266

Pre-defined Table-Layouts

There are a few pre-defined CSS-themes, which can be accessed with the css_theme()-function. There are more pre-defined themes planned for the future.

tab_model(m, CSS = css_theme("cells"))
  Total score BARTHEL INDEX
Predictors Estimates CI p
(Intercept) 90.06 77.95 – 102.18 <0.001
carer’age -0.22 -0.36 – -0.08 0.002
average number of hours
of care per week
-0.28 -0.31 – -0.24 <0.001
carer’s gender -0.26 -4.36 – 3.83 0.900
carer’s level of
education
-0.76 -3.55 – 2.02 0.592
Observations 821
R2 / R2 adjusted 0.270 / 0.266
sjPlot/inst/doc/blackwhitefigures.Rmd0000644000176200001440000000470414147735034017432 0ustar liggesusers--- title: "Black & White Figures for Print Journals" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Black & White Figures for Print Journals} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", dev = "png", fig.width = 7, fig.height = 5, message = FALSE, warning = FALSE) if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("haven", quietly = TRUE) || !requireNamespace("ggplot2", quietly = TRUE) || !requireNamespace("sjlabelled", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ``` This document shows examples how to create b/w figures, e.g. if you don't want colored figures for print-journals. ## Barplots in grey-scaled colors There are two ways to create plots in black and white or greyscale. For bar plots, `geom.colors = "gs"` creates a plot using a greyscale (based on `scales::grey_pal()`). ```{r} library(sjPlot) library(sjmisc) library(sjlabelled) library(ggplot2) theme_set(theme_bw()) data(efc) plot_grpfrq(efc$e42dep, efc$c172code, geom.colors = "gs") ``` ## Lineplots in b/w with different linetypes Similar to barplots, lineplots - mostly from `plot_model()` - can be plotted in greyscale as well (with `colors = "gs"`). However, in most cases lines colored in greyscale are difficult to distinguish. In this case, `plot_model()` supports black & white figures with different linetypes. Use `colors = "bw"` to create a b/w-plot. ```{r} # create binrary response y <- ifelse(efc$neg_c_7 < median(na.omit(efc$neg_c_7)), 0, 1) # create data frame for fitting model df <- data.frame( y = to_factor(y), sex = to_factor(efc$c161sex), dep = to_factor(efc$e42dep), barthel = efc$barthtot, education = to_factor(efc$c172code) ) # set variable label for response set_label(df$y) <- "High Negative Impact" # fit model fit <- glm(y ~., data = df, family = binomial(link = "logit")) # plot marginal effects plot_model( fit, type = "pred", terms = c("barthel", "sex","dep"), colors = "bw", ci.lvl = NA ) ``` Different linetypes do not apply to all linetyped plots, if these usually only plot a single line - so there's no need for different linetypes, and you can just set `colors = "black"` (or `colors = "bw"`). ```{r} # plot coefficients plot_model(fit, colors = "black") ``` sjPlot/inst/doc/tab_mixed.R0000644000176200001440000001356314150131531015330 0ustar liggesusersparams <- list(EVAL = TRUE) ## ----echo = FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE ) if (!requireNamespace("lme4", quietly = TRUE) || !requireNamespace("glmmTMB", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) } ## ---- results='hide', message=FALSE, warning=FALSE---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # load required packages library(sjPlot) library(lme4) data("sleepstudy") data("efc") efc$cluster <- as.factor(efc$e15relat) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- m1 <- lmer(neg_c_7 ~ c160age + c161sex + e42dep + (1 | cluster), data = efc) m2 <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy) tab_model(m1, m2) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m1, p.val = "kr", show.df = TRUE) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- data("efc") efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1) efc$cluster <- as.factor(efc$e15relat) m3 <- glmer( neg_c_7d ~ c160age + c161sex + e42dep + (1 | cluster), data = efc, family = binomial(link = "logit") ) tab_model(m1, m3) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- library(glmmTMB) data("Salamanders") m4 <- glmmTMB( count ~ spp + mined + (1 | site), ziformula = ~ spp + mined, family = truncated_poisson(link = "log"), data = Salamanders ) tab_model(m1, m3, m4, show.ci = FALSE) sjPlot/inst/doc/plot_model_estimates.Rmd0000644000176200001440000002127614073077247020152 0ustar liggesusers--- title: "Plotting Estimates (Fixed Effects) of Regression Models" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Plotting Estimates (Fixed Effects) of Regression Models} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", dev = "png", fig.width = 7, fig.height = 5, message = FALSE, warning = FALSE) if (!requireNamespace("sjlabelled", quietly = TRUE) || !requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("ggplot2", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) library(sjPlot) } ``` This document describes how to plot estimates as forest plots (or dot whisker plots) of various regression models, using the `plot_model()` function. `plot_model()` is a generic plot-function, which accepts many model-objects, like `lm`, `glm`, `lme`, `lmerMod` etc. `plot_model()` allows to create various plot tyes, which can be defined via the `type`-argument. The default is `type = "fe"`, which means that fixed effects (model coefficients) are plotted. For mixed effects models, only fixed effects are plotted by default as well. ```{r results='hide'} library(sjPlot) library(sjlabelled) library(sjmisc) library(ggplot2) data(efc) theme_set(theme_sjplot()) ``` ## Fitting a logistic regression model First, we fit a model that will be used in the following examples. The examples work in the same way for any other model as well. ```{r results='hide'} # create binary response y <- ifelse(efc$neg_c_7 < median(na.omit(efc$neg_c_7)), 0, 1) # create data frame for fitting model df <- data.frame( y = to_factor(y), sex = to_factor(efc$c161sex), dep = to_factor(efc$e42dep), barthel = efc$barthtot, education = to_factor(efc$c172code) ) # set variable label for response set_label(df$y) <- "High Negative Impact" # fit model m1 <- glm(y ~., data = df, family = binomial(link = "logit")) ``` ## Plotting estimates of generalized linear models The simplest function call is just passing the model object as argument. By default, estimates are sorted in descending order, with the highest effect at the top. ```{r} plot_model(m1) ``` The "neutral" line, i.e. the vertical intercept that indicates no effect (x-axis position 1 for most glm's and position 0 for most linear models), is drawn slightly thicker than the other grid lines. You can change the line color with the `vline.color`-argument. ```{r} plot_model(m1, vline.color = "red") ``` ## Sorting estimates By default, the estimates are sorted in the same order as they were introduced into the model. Use `sort.est = TRUE` to sort estimates in descending order, from highest to lowest value. ```{r} plot_model(m1, sort.est = TRUE) ``` Another way to sort estimates is to use the `order.terms`-argument. This is a numeric vector, indicating the order of estimates in the plot. In the summary, we see that "sex2" is the first term, followed by the three dependency-categories (position 2-4), the Barthel-Index (5) and two levels for intermediate and high level of education (6 and 7). ```{r} summary(m1) ``` Now we want the educational levels (6 and 7) first, than gender (1), followed by dependency (2-4)and finally the Barthel-Index (5). Use this order as numeric vector for the `order.terms`-argument. ```{r} plot_model(m1, order.terms = c(6, 7, 1, 2, 3, 4, 5)) ``` ## Estimates on the untransformed scale By default, `plot_model()` automatically exponentiates coefficients, if appropriate (e.g. for models with log or logit link). You can explicitley prevent transformation by setting the `transform`-argument to `NULL`, or apply any transformation by using a character vector with the function name. ```{r} plot_model(m1, transform = NULL) plot_model(m1, transform = "plogis") ``` ## Showing value labels By default, just the dots and error bars are plotted. Use `show.values = TRUE` to show the value labels with the estimates values, and use `show.p = FALSE` to suppress the asterisks that indicate the significance level of the p-values. Use `value.offset` to adjust the relative positioning of value labels to the dots and lines. ```{r} plot_model(m1, show.values = TRUE, value.offset = .3) ``` ## Labelling the plot As seen in the above examples, by default, the plotting-functions of **sjPlot** retrieve value and variable labels if the data is _labelled_, using the [sjlabelled-package](https://cran.r-project.org/package=sjlabelled). If the data is not labelled, the variable names are used. In such cases, use the arguments `title`, `axis.labels` and `axis.title` to annotate the plot title and axes. If you want variable names instead of labels, even for labelled data, use `""` as argument-value, e.g. `axis.labels = ""`, or set `auto.label` to `FALSE`. Furthermore, `plot_model()` applies case-conversion to all labels by default, using the [snakecase-package](https://cran.r-project.org/package=snakecase). This converts labels into human-readable versions. Use `case = NULL` to turn case-conversion off, or refer to the package-vignette of the **snakecase**-package for further options. ```{r} data(iris) m2 <- lm(Sepal.Length ~ Sepal.Width + Petal.Length + Species, data = iris) # variable names as labels, but made "human readable" # separating dots are removed plot_model(m2) # to use variable names even for labelled data plot_model(m1, axis.labels = "", title = "my own title") ``` ## Pick or remove specific terms from plot Use `terms` resp. `rm.terms` to select specific terms that should (not) be plotted. ```{r} # keep only coefficients sex2, dep2 and dep3 plot_model(m1, terms = c("sex2", "dep2", "dep3")) # remove coefficients sex2, dep2 and dep3 plot_model(m1, rm.terms = c("sex2", "dep2", "dep3")) ``` ## Standardized estimates For linear models, you can also plot standardized beta coefficients, using `type = "std"` or `type = "std2"`. These two options differ in the way how coefficients are standardized. `type = "std2"` plots standardized beta values, however, standardization follows Gelman's (2008) suggestion, rescaling the estimates by dividing them by two standard deviations instead of just one. ```{r} plot_model(m2, type = "std") ``` ## Bayesian models (fitted with Stan) `plot_model()` also supports stan-models fitted with the **rstanarm** or **brms** packages. However, there are a few differences compared to the previous plot examples. First, of course, there are no _confidence intervals_, but _uncertainty intervals_ - high density intervals, to be precise. Second, there's not just one interval range, but an _inner_ and _outer_ probability. By default, the inner probability is fixed to `.5` (50%), while the outer probability is specified via `ci.lvl` (which defaults to `.89` (89%) for Bayesian models). However, you can also use the arguments `prob.inner` and `prob.outer` to define the intervals boundaries. Third, the point estimate is by default the _median_, but can also be another value, like mean. This can be specified with the `bpe`-argument. ```{r results='hide'} if (require("rstanarm", quietly = TRUE)) { # make sure we apply a nice theme library(ggplot2) theme_set(theme_sjplot()) data(mtcars) m <- stan_glm(mpg ~ wt + am + cyl + gear, data = mtcars, chains = 1) # default model plot_model(m) # same model, with mean point estimate, dot-style for point estimate # and different inner/outer probabilities of the HDI plot_model( m, bpe = "mean", bpe.style = "dot", prob.inner = .4, prob.outer = .8 ) } ``` ## Tweaking plot appearance There are several options to customize the plot appearance: * The `colors`-argument either takes the name of a valid [colorbrewer palette](https://colorbrewer2.org/) (see also the related [vignette](custplot.html)), `"bw"` or `"gs"` for black/white or greyscaled colors, or a string with a color name. * `value.offset` and `value.size` adjust the positioning and size of value labels, if shown. * `dot.size` and `line.size` change the size of dots and error bars. * `vline.color` changes the neutral "intercept" line. * `width`, `alpha` and `scale` are passed down to certain ggplot-geoms, like `geom_errorbar()` or `geom_density_ridges()`. ```{r} plot_model( m1, colors = "Accent", show.values = TRUE, value.offset = .4, value.size = 4, dot.size = 3, line.size = 1.5, vline.color = "blue", width = 1.5 ) ``` # References Gelman A (2008) _Scaling regression inputs by dividing by two standard deviations._ Statistics in Medicine 27: 2865–2873. sjPlot/inst/doc/blackwhitefigures.html0000644000176200001440000015622014150131377017647 0ustar liggesusers Black & White Figures for Print Journals

Black & White Figures for Print Journals

Daniel Lüdecke

2021-11-26

This document shows examples how to create b/w figures, e.g. if you don’t want colored figures for print-journals.

Barplots in grey-scaled colors

There are two ways to create plots in black and white or greyscale. For bar plots, geom.colors = "gs" creates a plot using a greyscale (based on scales::grey_pal()).

library(sjPlot)
library(sjmisc)
library(sjlabelled)
library(ggplot2)
theme_set(theme_bw())
data(efc)
plot_grpfrq(efc$e42dep, efc$c172code, geom.colors = "gs")

Lineplots in b/w with different linetypes

Similar to barplots, lineplots - mostly from plot_model() - can be plotted in greyscale as well (with colors = "gs"). However, in most cases lines colored in greyscale are difficult to distinguish. In this case, plot_model() supports black & white figures with different linetypes. Use colors = "bw" to create a b/w-plot.

# create binrary response
y <- ifelse(efc$neg_c_7 < median(na.omit(efc$neg_c_7)), 0, 1)

# create data frame for fitting model
df <- data.frame(
  y = to_factor(y),
  sex = to_factor(efc$c161sex),
  dep = to_factor(efc$e42dep),
  barthel = efc$barthtot,
  education = to_factor(efc$c172code)
)

# set variable label for response
set_label(df$y) <- "High Negative Impact"

# fit model
fit <- glm(y ~., data = df, family = binomial(link = "logit"))

# plot marginal effects
plot_model(
  fit, 
  type = "pred", 
  terms = c("barthel", "sex","dep"), 
  colors = "bw",
  ci.lvl = NA
)

Different linetypes do not apply to all linetyped plots, if these usually only plot a single line - so there’s no need for different linetypes, and you can just set colors = "black" (or colors = "bw").

# plot coefficients
plot_model(fit, colors = "black")

sjPlot/inst/doc/tab_model_robust.Rmd0000644000176200001440000001364113746367064017264 0ustar liggesusers--- title: "Robust Estimation of Standard Errors, Confidence Intervals and p-values" output: rmarkdown::html_vignette params: EVAL: !r identical(Sys.getenv("NOT_CRAN"), "true") --- ```{r message=FALSE, warning=FALSE, include=FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE ) if (!requireNamespace("dplyr", quietly = TRUE) || !requireNamespace("sandwich", quietly = TRUE) || !requireNamespace("lme4", quietly = TRUE) || !requireNamespace("clubSandwich", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) library(sjPlot) library(dplyr) } set.seed(333) ``` The `tab_model()` function also allows the computation of standard errors, confidence intervals and p-values based on robust covariance matrix estimation from model parameters. Robust estimation is based on the packages **sandwich** and **clubSandwich**, so all models supported by either of these packages work with `tab_model()`. ## Classical Regression Models ### Robust Covariance Matrix Estimation from Model Parameters There are three arguments that allow for choosing different methods and options of robust estimation: `vcov.fun`, `vcov.type` and `vcov.args`. Let us start with a simple example, which uses a heteroskedasticity-consistent covariance matrix estimation with estimation-type "HC3" (i.e. `sandwich::vcovHC(type = "HC3")` is called): ```{r} data(iris) model <- lm(Petal.Length ~ Sepal.Length * Species + Sepal.Width, data = iris) # model parameters, where SE, CI and p-values are based on robust estimation tab_model(model, vcov.fun = "HC", show.se = TRUE) # compare standard errors to result from sandwich-package unname(sqrt(diag(sandwich::vcovHC(model)))) ``` ### Cluster-Robust Covariance Matrix Estimation (sandwich) If another covariance matrix estimation is required, use the `vcov.fun`-argument. This argument needs the suffix for the related `vcov*()`-functions as value, i.e. `vcov.fun = "CL"` would call `sandwich::vcovCL()`, or `vcov.fun = "HAC"` would call `sandwich::vcovHAC()`. The specific estimation type can be changed with `vcov.type`. E.g., `sandwich::vcovCL()` accepts estimation types HC0 to HC3. In the next example, we use a clustered covariance matrix estimation with HC1-estimation type. ```{r} # change estimation-type tab_model(model, vcov.fun = "CL", vcov.type = "HC1", show.se = TRUE) # compare standard errors to result from sandwich-package unname(sqrt(diag(sandwich::vcovCL(model)))) ``` Usually, clustered covariance matrix estimation is used when there is a cluster-structure in the data. The variable indicating the cluster-structure can be defined in `sandwich::vcovCL()` with the `cluster`-argument. In `tab_model()`, additional arguments that should be passed down to functions from the **sandwich** package can be specified in `vcov.args`: ```{r} iris$cluster <- factor(rep(LETTERS[1:8], length.out = nrow(iris))) # change estimation-type, defining additional arguments tab_model( model, vcov.fun = "CL", vcov.type = "HC1", vcov.args = list(cluster = iris$cluster), show.se = TRUE ) # compare standard errors to result from sandwich-package unname(sqrt(diag(sandwich::vcovCL(model, cluster = iris$cluster)))) ``` ### Cluster-Robust Covariance Matrix Estimation (clubSandwich) Cluster-robust estimation of the variance-covariance matrix can also be achieved using `clubSandwich::vcovCR()`. Thus, when `vcov.fun = "CR"`, the related function from the **clubSandwich** package is called. Note that this function _requires_ the specification of the `cluster`-argument. ```{r} # create fake-cluster-variable, to demonstrate cluster robust standard errors iris$cluster <- factor(rep(LETTERS[1:8], length.out = nrow(iris))) # cluster-robust estimation tab_model( model, vcov.fun = "CR", vcov.type = "CR1", vcov.args = list(cluster = iris$cluster), show.se = TRUE ) # compare standard errors to result from clubSsandwich-package unname(sqrt(diag(clubSandwich::vcovCR(model, type = "CR1", cluster = iris$cluster)))) ``` ### Robust Covariance Matrix Estimation on Standardized Model Parameters Finally, robust estimation can be combined with standardization. However, robust covariance matrix estimation only works for `show.std = "std"`. ```{r} # model parameters, robust estimation on standardized model tab_model( model, show.std = "std", vcov.fun = "HC" ) ``` ## Mixed Models ### Robust Covariance Matrix Estimation for Mixed Models For linear mixed models, that by definition have a clustered ("hierarchical" or multilevel) structure in the data, it is also possible to estimate a cluster-robust covariance matrix. This is possible due to the **clubSandwich** package, thus we need to define the same arguments as in the above example. ```{r} library(lme4) data(iris) set.seed(1234) iris$grp <- as.factor(sample(1:3, nrow(iris), replace = TRUE)) # fit example model model <- lme4::lmer( Sepal.Length ~ Species * Sepal.Width + Petal.Length + (1 | grp), data = iris ) # normal model parameters, like from 'summary()' tab_model(model) # model parameters, cluster robust estimation for mixed models tab_model( model, vcov.fun = "CR", vcov.type = "CR1", vcov.args = list(cluster = iris$grp) ) ``` ### Robust Covariance Matrix Estimation on Standardized Mixed Model Parameters Again, robust estimation can be combined with standardization for linear mixed models as well, which in such cases also only works for `show.std = "std"`. ```{r} # model parameters, cluster robust estimation on standardized mixed model tab_model( model, show.std = "std", vcov.fun = "CR", vcov.type = "CR1", vcov.args = list(cluster = iris$grp) ) ``` sjPlot/inst/doc/plot_interactions.R0000644000176200001440000001707114150131412017130 0ustar liggesusers## ----set-options, echo = FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ knitr::opts_chunk$set(collapse = TRUE, comment = "#>", dev = "png", fig.width = 7, fig.height = 3.5, message = FALSE, warning = FALSE) options(width = 800, tibble.width = Inf) if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("ggplot2", quietly = TRUE) || !requireNamespace("sjlabelled", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- library(sjPlot) library(sjmisc) library(ggplot2) data(efc) theme_set(theme_sjplot()) # make categorical efc$c161sex <- to_factor(efc$c161sex) # fit model with interaction fit <- lm(neg_c_7 ~ c12hour + barthtot * c161sex, data = efc) plot_model(fit, type = "pred", terms = c("barthtot", "c161sex")) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_model(fit, type = "int") ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_model(fit, type = "pred", terms = c("c161sex", "barthtot [0, 100]")) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # fit model with interaction, switching terms in formula fit <- lm(neg_c_7 ~ c12hour + c161sex * barthtot, data = efc) plot_model(fit, type = "int") ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_model(fit, type = "int", mdrt.values = "meansd") ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # fit model with 3-way-interaction fit <- lm(neg_c_7 ~ c12hour * barthtot * c161sex, data = efc) # select only levels 30, 50 and 70 from continuous variable Barthel-Index plot_model(fit, type = "pred", terms = c("c12hour", "barthtot [30,50,70]", "c161sex")) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_model(fit, type = "int") sjPlot/inst/doc/tab_bayes.html0000644000176200001440000013167314150131514016074 0ustar liggesusers Summary of Bayesian Models as HTML Table

Summary of Bayesian Models as HTML Table

Daniel Lüdecke

2021-11-26

This vignette shows examples for using tab_model() to create HTML tables for mixed models. Basically, tab_model() behaves in a very similar way for mixed models as for other, simple regression models, as shown in this vignette.

# load required packages
library(sjPlot)
library(insight)
library(httr)
library(brms)

# load sample models

# zinb <- read.csv("http://stats.idre.ucla.edu/stat/data/fish.csv")
# set.seed(123)
# m1 <- brm(bf(
#     count ~ persons + child + camper + (1 | persons),
#     zi ~ child + camper + (1 | persons)
#   ),
#   data = zinb,
#   family = zero_inflated_poisson()
# )
m1 <- insight::download_model("brms_zi_2")

# data(epilepsy)
# set.seed(123)
# epilepsy$visit <- as.numeric(epilepsy$visit)
# epilepsy$Base2 <- sample(epilepsy$Base, nrow(epilepsy), replace = TRUE)
# f1 <- bf(Base ~ zAge + count + (1 |ID| patient))
# f2 <- bf(Base2 ~ zAge + Trt + (1 |ID| patient))
# m2 <- brm(f1 + f2 + set_rescor(FALSE), data = epilepsy)
m2 <- insight::download_model("brms_mv_3")

Bayesian models summaries as HTML table

For Bayesian regression models, some of the differences to the table output from simple models or mixed models of tab_models() are the use of Highest Density Intervals instead of confidence intervals, the Bayes-R-squared values, and a different “point estimate” (which is, by default, the median from the posterior draws).

tab_model(m1)
  count
Predictors Incidence Rate Ratios CI (95%)
Count Model
Intercept 0.42 0.22 – 0.88
persons 2.32 1.86 – 2.93
child 0.32 0.26 – 0.38
camper 2.08 1.73 – 2.53
Zero-Inflated Model
Intercept 0.52 0.11 – 2.21
child 6.44 3.46 – 12.95
camper 0.43 0.21 – 0.87
Random Effects
σ2 5.29
τ00 33.86
ICC 0.14
N persons 4
Observations 250
Marginal R2 / Conditional R2 0.186 / 0.248

Multivariate response models

For multivariate response models, like mediator-analysis-models, it is recommended to print just one model in the table, as each regression is displayed as own “model” in the output.

tab_model(m2)
  Base Base2
Predictors Estimates CI (95%) Estimates CI (95%)
Intercept 28.61 11.35 – 34.20 26.61 11.24 – 29.03
z Age -4.85 -5.42 – -1.76 1.21 -0.31 – 2.15
count 0.00 -0.00 – 0.00
Trt: Trt 1 -0.32 -4.36 – 1.43
Random Effects
σ2 54.02
τ00 4.19
ICC 0.96
N patient 59
Observations 236

Show two Credible Interval-column

To show a second CI-column, use show.ci50 = TRUE.

tab_model(m2, show.ci50 = TRUE)
  Base Base2
Predictors Estimates CI (50%) CI (95%) Estimates CI (50%) CI (95%)
Intercept 28.61 24.07 – 30.23 11.35 – 34.20 26.61 21.53 – 28.45 11.24 – 29.03
z Age -4.85 -5.17 – -3.89 -5.42 – -1.76 1.21 0.74 – 1.54 -0.31 – 2.15
count 0.00 -0.00 – 0.00 -0.00 – 0.00
Trt: Trt 1 -0.32 -1.91 – 0.69 -4.36 – 1.43
Random Effects
σ2 56.30
τ00 4.50
ICC 0.96
N patient 59
Observations 236

Mixing multivariate and univariate response models

When both multivariate and univariate response models are displayed in one table, a column Response is added for the multivariate response model, to indicate the different outcomes.

tab_model(m1, m2)
  count Base,Base 2
Predictors Incidence Rate Ratios CI (95%) Estimates CI (95%) Response
Intercept 0.42 0.22 – 0.88 28.61 11.35 – 34.20 Base
Intercept 0.42 0.22 – 0.88 26.61 11.24 – 29.03 Base2
persons 2.32 1.86 – 2.93
child 0.32 0.26 – 0.38
camper 2.08 1.73 – 2.53
z Age -4.85 -5.42 – -1.76 Base
count 0.00 -0.00 – 0.00 Base
z Age 1.21 -0.31 – 2.15 Base2
Trt: Trt 1 -0.32 -4.36 – 1.43 Base2
Zero-Inflated Model
Intercept 0.52 0.11 – 2.21
child 6.44 3.46 – 12.95
camper 0.43 0.21 – 0.87
Random Effects
σ2 5.32 56.97
τ00 33.80 4.11
ICC 0.14 0.96
N 4 persons 59 patient
Observations 250 236
Marginal R2 / Conditional R2 0.186 / 0.248 NA
sjPlot/inst/doc/plot_likert_scales.html0000644000176200001440000070405214150131421020017 0ustar liggesusers Plotting Likert Scales

Plotting Likert Scales

Daniel Lüdecke

2021-11-26

library(dplyr)
library(sjPlot)
library(sjmisc)
library(parameters)
data(efc)
# find all variables from COPE-Index, which all have a "cop" in their
# variable name, and then plot that subset as likert-plot
mydf <- find_var(efc, pattern = "cop", out = "df")
plot_likert(mydf)

plot_likert(
  mydf,
  grid.range = c(1.2, 1.4),
  expand.grid = FALSE,
  values = "sum.outside",
  show.prc.sign = TRUE
)

# Plot in groups
plot_likert(mydf, groups = c(2, 1, 1, 1, 1, 2, 2, 2, 1))

pca <- parameters::principal_components(mydf)
groups <- parameters::closest_component(pca)
plot_likert(mydf, groups = groups)

plot_likert(
  mydf,
  c(rep("B", 4), rep("A", 5)),
  sort.groups = FALSE,
  grid.range = c(0.9, 1.1),
  geom.colors = "RdBu",
  rel_heights = c(6, 8),
  wrap.labels = 40,
  reverse.scale = TRUE
)

# control legend items
six_cat_example = data.frame(
  matrix(sample(1:6, 600, replace = TRUE), ncol = 6)
)

six_cat_example <-
  six_cat_example %>%
  dplyr::mutate_all( ~ ordered(., labels = c("+++", "++", "+", "-", "--", "---")))

# Old default
plot_likert(
  six_cat_example,
  groups = c(1, 1, 1, 2, 2, 2),
  group.legend.options = list(nrow = 2, byrow = FALSE)
)


# New default
plot_likert(six_cat_example, groups = c(1, 1, 1, 2, 2, 2))

sjPlot/inst/doc/table_css.R0000644000176200001440000001405314150131570015331 0ustar liggesusers## ----echo = FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- library(sjPlot) data(efc) m <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) tab <- tab_model(m) ## ----echo = TRUE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- cat(tab$page.style) ## ----echo = TRUE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- cat(tab$page.content) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model( m, CSS = list( css.depvarhead = 'color: red;', css.centeralign = 'text-align: left;', css.firsttablecol = 'font-weight: bold;', css.summary = 'color: blue;' ) ) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m, CSS = list(css.depvarhead = '+color: red;')) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m, CSS = css_theme("cells")) sjPlot/inst/doc/custplot.R0000644000176200001440000001126614150131403015245 0ustar liggesusers## ----echo = FALSE------------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>", fig.width = 7, fig.height = 5, warning = FALSE, message = FALSE) if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("haven", quietly = TRUE) || !requireNamespace("ggplot2", quietly = TRUE) || !requireNamespace("sjlabelled", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ## ----------------------------------------------------------------------------- # load libraries library(sjPlot) # for plotting library(sjmisc) # for sample data library(ggplot2) # to access ggplot-themes # load sample data set data(efc) set_theme( geom.outline.color = "antiquewhite4", geom.outline.size = 1, geom.label.size = 2, geom.label.color = "grey50", title.color = "red", title.size = 1.5, axis.angle.x = 45, axis.textcolor = "blue", base = theme_bw() ) plot_grpfrq( efc$e42dep, efc$e16sex, title = NULL, geom.colors = c("cadetblue", "coral"), geom.size = 0.4 ) ## ----------------------------------------------------------------------------- # blank theme set_theme( base = theme_blank(), axis.title.size = .9, axis.textsize = .9, legend.size = .7, legend.title.size = .8, geom.label.size = 3 ) plot_grpfrq( efc$e42dep, efc$e15relat, geom.colors = "PuRd", show.values = FALSE ) ## ---- eval=FALSE-------------------------------------------------------------- # library(RColorBrewer) # display.brewer.all() ## ----------------------------------------------------------------------------- set_theme(geom.label.color = "white", geom.label.size = 3) # labels appear very large due to export metrics plot_grpfrq(efc$e42dep, efc$e16sex, coord.flip = TRUE) ## ----results='hide', echo=FALSE----------------------------------------------- set_theme( axis.title.size = .9, axis.textsize = .9, legend.size = .7, legend.title.size = .8, geom.label.size = 3 ) ## ----------------------------------------------------------------------------- plot_grpfrq(efc$e42dep, efc$e16sex, expand.grid = TRUE) ## ----------------------------------------------------------------------------- set_theme(base = theme_light()) plot_frq(efc$e42dep) ## ----------------------------------------------------------------------------- library(sjmisc) data(efc) efc <- to_factor(efc, e42dep, c172code) m <- lm(neg_c_7 ~ pos_v_4 + c12hour + e42dep + c172code, data = efc) # reset theme set_theme(base = theme_grey()) # forest plot of regression model p <- plot_model(m) # default theme p # pre-defined theme p + theme_sjplot() ## ----------------------------------------------------------------------------- p + theme_sjplot2() + scale_color_sjplot("simply") ## ----------------------------------------------------------------------------- show_sjplot_pals() ## ----------------------------------------------------------------------------- set_theme(base = theme_bw(), axis.linecolor = "darkgreen") plot_frq(efc$e42dep) ## ----------------------------------------------------------------------------- set_theme( base = theme_classic(), axis.tickslen = 0, # hides tick marks axis.title.size = .9, axis.textsize = .9, legend.size = .7, legend.title.size = .8, geom.label.size = 3.5 ) plot_grpfrq( efc$e42dep, efc$e16sex, coord.flip = TRUE, show.axis.values = FALSE ) + theme(axis.line.x = element_line(color = "white")) ## ----------------------------------------------------------------------------- set_theme( base = theme_classic(), legend.title.face = "italic", # title font face legend.inside = TRUE, # legend inside plot legend.color = "grey50", # legend label color legend.pos = "bottom right", # legend position inside plot axis.title.size = .9, axis.textsize = .9, legend.size = .7, legend.title.size = .8, geom.label.size = 3 ) plot_grpfrq(efc$e42dep, efc$e16sex, coord.flip = TRUE) ## ----------------------------------------------------------------------------- set_theme( base = theme_classic(), axis.linecolor = "white", # "remove" axis lines axis.textcolor.y = "darkred", # set axis label text only for y axis axis.tickslen = 0, # "remove" tick marks legend.title.color = "red", # legend title color legend.title.size = 2, # legend title size legend.color = "green", # legend label color legend.pos = "top", # legend position above plot axis.title.size = .9, axis.textsize = .9, legend.size = .7, geom.label.size = 3 ) plot_grpfrq(efc$e42dep, efc$e16sex) sjPlot/inst/doc/table_css.Rmd0000644000176200001440000000753413662304072015666 0ustar liggesusers--- title: "Customizing HTML tables" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Customizing HTML tables} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE) ``` All `tab_*`-functions create a HTML page with the table output. This table, by default, is opened in the viewer pane of your IDE (in case you’re using an IDE that also supports the viewer pane). If a viewer pane is not available, the created HTML output is saved as temporary file and opened in your default web browser. The temporary files are deleted after your R session ends. ## Copying table output to office or word processors ### Export table as HTML file to open in word processors You can save the HTML page as file for further usage by specifying the `file`-argument The saved HTML file can be opened by word processors like LibreOffice or Microsoft Office. ### Drag and drop from browser or RStudio viewer pane You can directly drag and drop a table from the RStudio viewer pane or browser into your word processor. Simply select the complete table with your mouse and drag it into office. ## Customizing table output with the CSS parameter The table output is in in HTML format. The table style (visual appearance) is formatted using _Cascading Style Sheets_ (CSS). If you are a bit familiar with these topics, you can easily customize the appearance of the table output. Many table elements (header, row, column, cell, summary row, first row or column...) have CSS-class attributes, which can be used to change the table style. Since each `sjt.*` function as well as `tab_model()` has different table elements and thus different class attributes, you first need to know which styles can be customized. ### Retrieving customizable styles The table functions invisibly return several values. The return value `page.style` contains the style information for the HTML table. You can print this style sheet to console using the `cat()`-function: ```{r} library(sjPlot) data(efc) m <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) tab <- tab_model(m) ``` ```{r echo = TRUE} cat(tab$page.style) ``` The HTML code is in the `page.content` return value. The following code prints the HTML code of the table to the R console: ```{r echo = TRUE} cat(tab$page.content) ``` Now you can see which table elements are associated with which CSS class attributes. ## Customizing table output with the CSS parameter You can customize the table output with the `CSS` parameter. This parameter requires a list of attributes, which follow a certain pattern: 1) each attributes needs a `css.` prefix 2) followed by the class name (e.g. `caption`, `thead`, `centeralign`, etc.) 3) equal-sign 4) the CSS format (in (single) quotation marks) 5) the CSS format must end with a colon (;) Example: ```{r} tab_model( m, CSS = list( css.depvarhead = 'color: red;', css.centeralign = 'text-align: left;', css.firsttablecol = 'font-weight: bold;', css.summary = 'color: blue;' ) ) ``` In the above example, the header row lost the original style and just became red. If you want to keep the original style and just add additional style information, use the plus-sign (+) as initial character for the parameter attributes. In the following example, the header row keeps its original style and is additionally printed in red: ```{r} tab_model(m, CSS = list(css.depvarhead = '+color: red;')) ``` ## Pre-defined Table-Layouts There are a few pre-defined CSS-themes, which can be accessed with the `css_theme()`-function. There are more pre-defined themes planned for the future. ```{r} tab_model(m, CSS = css_theme("cells")) ``` sjPlot/inst/doc/tab_bayes.Rmd0000644000176200001440000000564513612122336015655 0ustar liggesusers--- title: "Summary of Bayesian Models as HTML Table" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette params: EVAL: !r identical(Sys.getenv("NOT_CRAN"), "true") --- ```{r echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE ) if (!requireNamespace("insight", quietly = TRUE) || !requireNamespace("httr", quietly = TRUE) || !requireNamespace("brms", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) } ``` This vignette shows examples for using `tab_model()` to create HTML tables for mixed models. Basically, `tab_model()` behaves in a very similar way for mixed models as for other, simple regression models, as shown [in this vignette](tab_model_estimates.html). ```{r, results='hide', message=FALSE, warning=FALSE} # load required packages library(sjPlot) library(insight) library(httr) library(brms) # load sample models # zinb <- read.csv("http://stats.idre.ucla.edu/stat/data/fish.csv") # set.seed(123) # m1 <- brm(bf( # count ~ persons + child + camper + (1 | persons), # zi ~ child + camper + (1 | persons) # ), # data = zinb, # family = zero_inflated_poisson() # ) m1 <- insight::download_model("brms_zi_2") # data(epilepsy) # set.seed(123) # epilepsy$visit <- as.numeric(epilepsy$visit) # epilepsy$Base2 <- sample(epilepsy$Base, nrow(epilepsy), replace = TRUE) # f1 <- bf(Base ~ zAge + count + (1 |ID| patient)) # f2 <- bf(Base2 ~ zAge + Trt + (1 |ID| patient)) # m2 <- brm(f1 + f2 + set_rescor(FALSE), data = epilepsy) m2 <- insight::download_model("brms_mv_3") ``` ## Bayesian models summaries as HTML table For Bayesian regression models, some of the differences to the table output from [simple models](tab_model_estimates.html) or [mixed models](tab_mixed.html) of `tab_models()` are the use of _Highest Density Intervals_ instead of confidence intervals, the Bayes-R-squared values, and a different "point estimate" (which is, by default, the median from the posterior draws). ```{r} tab_model(m1) ``` ## Multivariate response models For multivariate response models, like mediator-analysis-models, it is recommended to print just one model in the table, as each regression is displayed as own "model" in the output. ```{r} tab_model(m2) ``` ## Show two Credible Interval-column To show a second CI-column, use `show.ci50 = TRUE`. ```{r} tab_model(m2, show.ci50 = TRUE) ``` ## Mixing multivariate and univariate response models When both multivariate and univariate response models are displayed in one table, a column _Response_ is added for the multivariate response model, to indicate the different outcomes. ```{r} tab_model(m1, m2) ``` sjPlot/inst/doc/plot_marginal_effects.html0000644000176200001440000036720614150131427020500 0ustar liggesusers Plotting Marginal Effects of Regression Models

Plotting Marginal Effects of Regression Models

Daniel Lüdecke

2021-11-26

This document describes how to plot marginal effects of various regression models, using the plot_model() function. plot_model() is a generic plot-function, which accepts many model-objects, like lm, glm, lme, lmerMod etc.

plot_model() allows to create various plot tyes, which can be defined via the type-argument. The default is type = "fe", which means that fixed effects (model coefficients) are plotted. To plot marginal effects, call plot_model() with:

  • type = "pred" to plot predicted values (marginal effects) for specific model terms.
  • type = "eff", which is similar to type = "pred", however, discrete predictors are held constant at their proportions (not reference level). It internally calls via .
  • type = "emm", which is similar to type = "eff". It internally calls via .
  • type = "int" to plot marginal effects of interaction terms.

To plot marginal effects of regression models, at least one model term needs to be specified for which the effects are computed. It is also possible to compute marginal effects for model terms, grouped by the levels of another model’s predictor. The function also allows plotting marginal effects for two- or three-way-interactions, however, this is shown in a different vignette.

plot_model() supports labelled data and automatically uses variable and value labels to annotate the plot. This works with most regression modelling functions.

Note: For marginal effects plots, sjPlot calls functions from the ggeffects-package. If you need more flexibility when creating marginal effects plots, consider directly using the ggeffects-package.

Marginal effects

plot_model(type = "pred") computes predicted values for all possible levels and values from a model’s predictors. In the simplest case, a fitted model is passed as first argument, followed by the type argument and the term in question as terms argument:

library(sjPlot)
library(ggplot2)
data(efc)
theme_set(theme_sjplot())

fit <- lm(barthtot ~ c12hour + neg_c_7 + c161sex + c172code, data = efc)

plot_model(fit, type = "pred", terms = "c12hour")

The plot shows the predicted values for the response at each value from the term c12hour.

Marginal effects for different groups

The terms-argument accepts up to three model terms, where the second and third term indicate grouping levels. This allows predictions for the term in question at different levels for other model terms:

plot_model(fit, type = "pred", terms = c("c12hour", "c172code"))

A second grouping structure can be defined, which will create a plot with multiple panels in grid layout:

plot_model(fit, type = "pred", terms = c("c12hour", "c172code", "c161sex"))

Marginal effects at specific values or levels

The terms-argument not only defines the model terms of interest, but each model term that defines the grouping structure can be limited to certain values. This allows to compute and plot marginal effects for terms at specific values only. To define these values, put them in square brackets directly after the term name: terms = c("c12hour [30, 50, 80]", "c172code [1,3]")

plot_model(fit, type = "pred", terms = c("c12hour [30, 50, 80]", "c172code [1,3]"))

Note that in the above plot, although the values 30, 50 and 80 only are selected from c12hour, the continuous scale automatically adds panel grids every 5 units along the x-axis.

Defining own values is especially useful when variables are, for instance, log-transformed. plot_model() then typically only uses the range of the log-transformed variable, which is in most cases not what we want. In such situation, specify the range in the terms-argument.

data(mtcars)
mpg_model <- lm(mpg ~ log(hp), data = mtcars)

# x-values and predictions based on the log(hp)-values
plot_model(mpg_model, type = "pred", terms = "hp")


# x-values and predictions based on hp-values from 50 to 150
plot_model(mpg_model, type = "pred", terms = "hp [50:150]")

The brackets in the terms-argument also accept the name of a valid function, to (back-)transform predicted valued. In this example, an alternative would be to specify that values should be exponentiated, which is indicated by [exp] in the terms-argument:

# x-values and predictions based on exponentiated hp-values
plot_model(mpg_model, type = "pred", terms = "hp [exp]")

Polynomial terms and splines

The function also works for models with polynomial terms or splines. Following code reproduces the plot from ?splines::bs:

library(splines)
data(women)

fm1 <- lm(weight ~ bs(height, df = 5), data = women)
plot_model(fm1, type = "pred", terms = "height")

Different constant values for factors

Model predictions are based on all possible combinations of the model terms, which are - roughly speaking - created using expand.grid(). For the terms in question, all values are used for combinations. All other model predictors that are not specified in the terms-argument, are held constant (which is achieved with sjstats::typical_value()). By default, continuous variables are set to their mean, while factors are set to their reference level.

data(efc)
efc$c172code <- sjlabelled::as_factor(efc$c172code)
fit <- lm(neg_c_7 ~ c12hour + c172code, data = efc)

# reference category is used for "c172code", i.e. c172code
# used the first level as value for predictions
plot_model(fit, type = "pred", terms = "c12hour")

However, one may want to set factors to their proportions instead of reference level. E.g., a factor gender with value 0 for female and value 1 for male persons, would be set to 0 when marginal effects are computed with type = "pred". But if 40% of the sample are female persons, another possibility to hold this factor constant is to use the value .4 (reflecting the proportion of 40%). If this is required, use type = "eff", which internally does not call predict() to compute marginal effects, but rather effects::effect().

# proportion is used for "c172code", i.e. it is set to
# mean(sjlabelled::as_numeric(efc$c172code), na.rm = T),
# which is about 1.9715
plot_model(fit, type = "eff", terms = "c12hour")

Interaction terms

Plotting interaction terms are described in a separate vignette.

sjPlot/inst/doc/plot_marginal_effects.R0000644000176200001440000002352614150131427017727 0ustar liggesusersparams <- list(EVAL = TRUE) ## ----set-options, echo = FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------ knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE, dev = "png", fig.width = 7, fig.height = 3.5, warning = FALSE, eval = TRUE # eval = if (isTRUE(exists("params"))) params$EVAL else FALSE ) options(width = 800, tibble.width = Inf) if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("splines", quietly = TRUE) || !requireNamespace("ggplot2", quietly = TRUE) || !requireNamespace("sjlabelled", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- library(sjPlot) library(ggplot2) data(efc) theme_set(theme_sjplot()) fit <- lm(barthtot ~ c12hour + neg_c_7 + c161sex + c172code, data = efc) plot_model(fit, type = "pred", terms = "c12hour") ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_model(fit, type = "pred", terms = c("c12hour", "c172code")) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_model(fit, type = "pred", terms = c("c12hour", "c172code", "c161sex")) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- plot_model(fit, type = "pred", terms = c("c12hour [30, 50, 80]", "c172code [1,3]")) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- data(mtcars) mpg_model <- lm(mpg ~ log(hp), data = mtcars) # x-values and predictions based on the log(hp)-values plot_model(mpg_model, type = "pred", terms = "hp") # x-values and predictions based on hp-values from 50 to 150 plot_model(mpg_model, type = "pred", terms = "hp [50:150]") ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # x-values and predictions based on exponentiated hp-values plot_model(mpg_model, type = "pred", terms = "hp [exp]") ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- library(splines) data(women) fm1 <- lm(weight ~ bs(height, df = 5), data = women) plot_model(fm1, type = "pred", terms = "height") ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- data(efc) efc$c172code <- sjlabelled::as_factor(efc$c172code) fit <- lm(neg_c_7 ~ c12hour + c172code, data = efc) # reference category is used for "c172code", i.e. c172code # used the first level as value for predictions plot_model(fit, type = "pred", terms = "c12hour") ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # proportion is used for "c172code", i.e. it is set to # mean(sjlabelled::as_numeric(efc$c172code), na.rm = T), # which is about 1.9715 plot_model(fit, type = "eff", terms = "c12hour") sjPlot/inst/doc/sjtitemanalysis.html0000644000176200001440000014650314150131451017364 0ustar liggesusers Item Analysis of a Scale or an Index

Item Analysis of a Scale or an Index

Daniel Lüdecke

2021-11-26

This document shows examples for using the tab_itemscale() function of the sjPlot package.

Performing an item analysis of a scale or index

This function performs an item analysis with certain statistics that are useful for scale or index development. Following statistics are computed for each variable (column) of a data frame:

  • percentage of missing values
  • mean value
  • standard deviation
  • skew
  • item difficulty
  • item discrimination
  • Cronbach’s Alpha if item was removed from scale
  • mean (or average) inter-item-correlation

Optional, following statistics can be computed as well:

  • kurstosis
  • Shapiro-Wilk Normality Test

If the argument factor.groups is not NULL, the data frame df will be splitted into groups, assuming that factor.groups indicate those columns (variables) of the data frame that belong to a certain factor (see, for instance, return value of function tab_pca() or parameters::principal_components() as example for retrieving factor groups for a scale). This is useful when you have perfomed a principal component analysis or factor analysis as first step, and now want to see whether the found factors / components represent a scale or index score.

To demonstrate this function, we first need some data:

Index score with one component

The simplest function call is just passing the data frame as argument. In this case, the function assumes that all variables of the data frame belong to one factor only.

tab_itemscale(mydf)
Component 1
Missings Mean SD Skew Item Difficulty Item Discrimination α if deleted
0.77 % 3.12 0.58 -0.12 0.78 -0.24 0.54
0.66 % 2.02 0.72 0.65 0.51 0.33 0.38
0.66 % 1.63 0.87 1.31 0.41 0.41 0.34
1.10 % 1.77 0.87 1.06 0.44 0.44 0.32
0.66 % 1.39 0.67 1.77 0.35 0.36 0.38
0.88 % 1.29 0.64 2.43 0.32 0.42 0.37
0.88 % 1.92 0.91 0.83 0.48 0.37 0.35
0.77 % 2.16 1.04 0.32 0.54 -0.03 0.53
2.20 % 2.93 0.96 -0.45 0.73 -0.11 0.56
Mean inter-item-correlation=0.092 · Cronbach’s α=0.459

To interprete the output, we may consider following values as rule-of-thumbs for indicating a reliable scale:

  • item difficulty should range between 0.2 and 0.8. Ideal value is p+(1-p)/2 (which mostly is between 0.5 and 0.8)
  • for item discrimination, acceptable values are 0.2 or higher; the closer to 1 the better
  • in case the total Cronbach’s Alpha value is below the acceptable cut-off of 0.7 (mostly if an index has few items), the mean inter-item-correlation is an alternative measure to indicate acceptability; satisfactory range lies between 0.2 and 0.4

Index score with more than one component

The items of the COPE index used for our example do not represent a single factor. We can check this, for instance, with a principle component analysis. If you know, which variable belongs to which factor (i.e. which variable is part of which component), you can pass a numeric vector with these group indices to the argument factor.groups. In this case, the data frame is divided into the components specified by factor.groups, and each component (or factor) is analysed.

library(parameters)
# Compute PCA on Cope-Index, and retrieve 
# factor indices for each COPE index variable
pca <- parameters::principal_components(mydf)
factor.groups <- parameters::closest_component(pca)

The PCA extracted two components. Now tab_itemscale()

  1. performs an item analysis on both components, showing whether each of them is a reliable and useful scale or index score
  2. builds an index of each component, by standardizing each scale
  3. and adds a component-correlation-matrix, to see whether the index scores (which are based on the components) are highly correlated or not.
tab_itemscale(mydf, factor.groups)
#> Warning: Data frame needs at least three columns for reliability-test.
Component 1
Missings Mean SD Skew Item Difficulty Item Discrimination α if deleted
0.77 % 3.12 0.58 -0.12 0.78 -0.37 0.78
0.66 % 2.02 0.72 0.65 0.51 0.49 0.61
0.66 % 1.63 0.87 1.31 0.41 0.55 0.59
1.10 % 1.77 0.87 1.06 0.44 0.54 0.59
0.66 % 1.39 0.67 1.77 0.35 0.44 0.63
0.88 % 1.29 0.64 2.43 0.32 0.47 0.62
0.88 % 1.92 0.91 0.83 0.48 0.57 0.58
Mean inter-item-correlation=0.196 · Cronbach’s α=0.676

 

Component 2
Missings Mean SD Skew Item Difficulty Item Discrimination α if deleted
0.77 % 2.16 1.04 0.32 0.54 NA NA
2.20 % 2.93 0.96 -0.45 0.73 NA NA
Mean inter-item-correlation=0.260 · Cronbach’s α=0.412

 

  Component 1 Component 2
Component 1 α=0.676  
Component 2 -0.196
(<.001)
α=0.412
Computed correlation used pearson-method with listwise-deletion.

Adding further statistics

tab_itemscale(mydf, factor.groups, show.shapiro = TRUE, show.kurtosis = TRUE)
#> Warning: Data frame needs at least three columns for reliability-test.
Component 1
Missings Mean SD Skew Kurtosis W(p) Item Difficulty Item Discrimination α if deleted
0.77 % 3.12 0.58 -0.12 0.27 0.75 (0.000) 0.78 -0.37 0.78
0.66 % 2.02 0.72 0.65 0.73 0.80 (0.000) 0.51 0.49 0.61
0.66 % 1.63 0.87 1.31 0.86 0.72 (0.000) 0.41 0.55 0.59
1.10 % 1.77 0.87 1.06 0.48 0.78 (0.000) 0.44 0.54 0.59
0.66 % 1.39 0.67 1.77 2.87 0.62 (0.000) 0.35 0.44 0.63
0.88 % 1.29 0.64 2.43 5.77 0.51 (0.000) 0.32 0.47 0.62
0.88 % 1.92 0.91 0.83 -0.08 0.81 (0.000) 0.48 0.57 0.58
Mean inter-item-correlation=0.196 · Cronbach’s α=0.676

 

Component 2
Missings Mean SD Skew Kurtosis W(p) Item Difficulty Item Discrimination α if deleted
0.77 % 2.16 1.04 0.32 -1.14 0.85 (0.000) 0.54 NA NA
2.20 % 2.93 0.96 -0.45 -0.83 0.85 (0.000) 0.73 NA NA
Mean inter-item-correlation=0.260 · Cronbach’s α=0.412

 

  Component 1 Component 2
Component 1 α=0.676  
Component 2 -0.196
(<.001)
α=0.412
Computed correlation used pearson-method with listwise-deletion.
sjPlot/inst/doc/tab_model_estimates.R0000644000176200001440000005756114150131555017414 0ustar liggesusers## ----echo = FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>", warning = FALSE, message = FALSE) if (!requireNamespace("sjlabelled", quietly = TRUE) || !requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("lme4", quietly = TRUE) || !requireNamespace("pscl", quietly = TRUE) || !requireNamespace("glmmTMB", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) library(sjPlot) } ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # load package library(sjPlot) library(sjmisc) library(sjlabelled) # sample data data("efc") efc <- as_factor(efc, c161sex, c172code) ## ---- results='hide'---------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- m1 <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc) m2 <- lm(neg_c_7 ~ c160age + c12hour + c161sex + e17age, data = efc) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m1) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- data(mtcars) m.mtcars <- lm(mpg ~ cyl + hp + wt, data = mtcars) tab_model(m.mtcars) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- set.seed(2) dat <- data.frame( y = runif(100, 0, 100), drug = as.factor(sample(c("nonsense", "useful", "placebo"), 100, TRUE)), group = as.factor(sample(c("control", "treatment"), 100, TRUE)) ) pretty_names <- lm(y ~ drug * group, data = dat) tab_model(pretty_names) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m1, auto.label = FALSE) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(pretty_names, auto.label = FALSE) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m1, m2) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- m3 <- glm( tot_sc_e ~ c160age + c12hour + c161sex + c172code, data = efc, family = poisson(link = "log") ) efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1) m4 <- glm( neg_c_7d ~ c161sex + barthtot + c172code, data = efc, family = binomial(link = "logit") ) tab_model(m3, m4) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m3, m4, transform = NULL, auto.label = FALSE) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- library(pscl) data("bioChemists") m5 <- zeroinfl(art ~ fem + mar + kid5 + ment | kid5 + phd + ment, data = bioChemists) tab_model(m5) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m1, m3, m5, auto.label = FALSE, show.ci = FALSE) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m1, show.se = TRUE, show.std = TRUE, show.stat = TRUE) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m3, m4, show.ci = FALSE, show.p = FALSE, auto.label = FALSE) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model( m1, show.se = TRUE, show.std = TRUE, show.stat = TRUE, col.order = c("p", "stat", "est", "std.se", "se", "std.est") ) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m1, collapse.ci = TRUE) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model( m1, m2, pred.labels = c("Intercept", "Age (Carer)", "Hours per Week", "Gender (Carer)", "Education: middle (Carer)", "Education: high (Carer)", "Age (Older Person)"), dv.labels = c("First Model", "M2"), string.pred = "Coeffcient", string.ci = "Conf. Int (95%)", string.p = "P-Value" ) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- library(glmmTMB) data("Salamanders") model <- glm( count ~ spp + Wtemp + mined + cover, family = poisson(), data = Salamanders ) tab_model(model) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(model, show.reflvl = TRUE) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(model, show.reflvl = TRUE, prefix.labels = "varname") ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m1, m2, p.style = "stars") ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m1, m2, p.style = "scientific", digits.p = 2) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- # example, coefficients are "c161sex2" or "c172code3" summary(m1) pl <- c( `(Intercept)` = "Intercept", e17age = "Age (Older Person)", c160age = "Age (Carer)", c12hour = "Hours per Week", barthtot = "Barthel-Index", c161sex2 = "Gender (Carer)", c172code2 = "Education: middle (Carer)", c172code3 = "Education: high (Carer)", a_non_used_label = "We don't care" ) tab_model( m1, m2, m3, m4, pred.labels = pl, dv.labels = c("Model1", "Model2", "Model3", "Model4"), show.ci = FALSE, show.p = FALSE, transform = NULL ) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m1, terms = c("c160age", "c12hour")) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_model(m1, rm.terms = c("c172code2", "c161sex2")) sjPlot/inst/doc/plot_likert_scales.Rmd0000644000176200001440000000414513662304072017604 0ustar liggesusers--- title: "Plotting Likert Scales" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Plotting Likert Scales} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r set-options, echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>", dev = "png", fig.width = 7, fig.height = 6, message = FALSE, warning = FALSE) options(width = 800, tibble.width = Inf) if (!requireNamespace("dplyr", quietly = TRUE) || !requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("parameters", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) } ``` ```{r fig.height = 5.5} library(dplyr) library(sjPlot) library(sjmisc) library(parameters) data(efc) # find all variables from COPE-Index, which all have a "cop" in their # variable name, and then plot that subset as likert-plot mydf <- find_var(efc, pattern = "cop", out = "df") plot_likert(mydf) ``` ```{r} plot_likert( mydf, grid.range = c(1.2, 1.4), expand.grid = FALSE, values = "sum.outside", show.prc.sign = TRUE ) ``` ```{r} # Plot in groups plot_likert(mydf, groups = c(2, 1, 1, 1, 1, 2, 2, 2, 1)) ``` ```{r fig.height = 6.5} pca <- parameters::principal_components(mydf) groups <- parameters::closest_component(pca) plot_likert(mydf, groups = groups) ``` ```{r} plot_likert( mydf, c(rep("B", 4), rep("A", 5)), sort.groups = FALSE, grid.range = c(0.9, 1.1), geom.colors = "RdBu", rel_heights = c(6, 8), wrap.labels = 40, reverse.scale = TRUE ) ``` ```{r fig.height = 5} # control legend items six_cat_example = data.frame( matrix(sample(1:6, 600, replace = TRUE), ncol = 6) ) six_cat_example <- six_cat_example %>% dplyr::mutate_all( ~ ordered(., labels = c("+++", "++", "+", "-", "--", "---"))) # Old default plot_likert( six_cat_example, groups = c(1, 1, 1, 2, 2, 2), group.legend.options = list(nrow = 2, byrow = FALSE) ) # New default plot_likert(six_cat_example, groups = c(1, 1, 1, 2, 2, 2)) ``` sjPlot/inst/doc/tab_mixed.html0000644000176200001440000016764014150131531016101 0ustar liggesusers Summary of Mixed Models as HTML Table

Summary of Mixed Models as HTML Table

Daniel Lüdecke

2021-11-26

## Warning in checkMatrixPackageVersion(): Package version inconsistency detected.
## TMB was built with Matrix version 1.3.3
## Current Matrix version is 1.3.4
## Please re-install 'TMB' from source using install.packages('TMB', type = 'source') or ask CRAN for a binary version of 'TMB' matching CRAN's 'Matrix' package

This vignette shows examples for using tab_model() to create HTML tables for mixed models. Basically, tab_model() behaves in a very similar way for mixed models as for other, simple regression models, as shown in this vignette.

# load required packages
library(sjPlot)
library(lme4)
data("sleepstudy")
data("efc")
efc$cluster <- as.factor(efc$e15relat)

Mixed models summaries as HTML table

Unlike tables for non-mixed models, tab_models() adds additional information on the random effects to the table output for mixed models. You can hide these information with show.icc = FALSE and show.re.var = FALSE. Furthermore, the R-squared values are marginal and conditional R-squared statistics, based on Nakagawa et al. 2017.

m1 <- lmer(neg_c_7 ~ c160age + c161sex + e42dep + (1 | cluster), data = efc)
m2 <- lmer(Reaction ~ Days + (1 + Days | Subject), data = sleepstudy)

tab_model(m1, m2)
  Negative impact with 7
items
Reaction
Predictors Estimates CI p Estimates CI p
(Intercept) 6.55 4.86 – 8.23 <0.001 251.41 237.94 – 264.87 <0.001
carer’age -0.00 -0.03 – 0.02 0.802
carer’s gender 0.47 -0.08 – 1.02 0.094
elder’s dependency 1.45 1.19 – 1.71 <0.001
Days 10.47 7.42 – 13.52 <0.001
Random Effects
σ2 12.61 654.94
τ00 0.50 cluster 612.10 Subject
τ11   35.07 Subject.Days
ρ01   0.07 Subject
ICC 0.04 0.72
N 8 cluster 18 Subject
Observations 888 180
Marginal R2 / Conditional R2 0.127 / 0.160 0.279 / 0.799

The marginal R-squared considers only the variance of the fixed effects, while the conditional R-squared takes both the fixed and random effects into account.

The p-value is a simple approximation, based on the t-statistics and using the normal distribution function. A more precise p-value can be computed using p.val = "kr". In this case, which only applies to linear mixed models, the computation of p-values is based on conditional F-tests with Kenward-Roger approximation for the degrees of freedom (using the using the pbkrtest-package). Note that here the computation is more time consuming and thus not used as default. You can also display the approximated degrees of freedom with show.df.

tab_model(m1, p.val = "kr", show.df = TRUE)
  Negative impact with 7
items
Predictors Estimates CI p df
(Intercept) 6.55 4.82 – 8.28 <0.001 136.86
carer’age -0.00 -0.03 – 0.02 0.810 255.72
carer’s gender 0.47 -0.08 – 1.02 0.095 881.58
elder’s dependency 1.45 1.19 – 1.71 <0.001 883.58
Random Effects
σ2 12.61
τ00 cluster 0.50
ICC 0.04
N cluster 8
Observations 888
Marginal R2 / Conditional R2 0.127 / 0.160

Generalized linear mixed models

tab_model() can also print and combine models with different link-functions.

data("efc")
efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1)
efc$cluster <- as.factor(efc$e15relat)
m3 <- glmer(
  neg_c_7d ~ c160age + c161sex + e42dep + (1 | cluster),
  data = efc, 
  family = binomial(link = "logit")
)

tab_model(m1, m3)
  Negative impact with 7
items
neg c 7 d
Predictors Estimates CI p Odds Ratios CI p
(Intercept) 6.55 4.86 – 8.23 <0.001 0.02 0.01 – 0.05 <0.001
carer’age -0.00 -0.03 – 0.02 0.802 1.01 0.99 – 1.02 0.355
carer’s gender 0.47 -0.08 – 1.02 0.094 1.83 1.30 – 2.59 0.001
elder’s dependency 1.45 1.19 – 1.71 <0.001 2.37 1.99 – 2.81 <0.001
Random Effects
σ2 12.61 3.29
τ00 0.50 cluster 0.24 cluster
ICC 0.04 0.07
N 8 cluster 8 cluster
Observations 888 888
Marginal R2 / Conditional R2 0.127 / 0.160 0.181 / 0.237

More complex models

Finally, an example from the glmmTMB-package to show how easy it is to print zero-inflated generalized linear mixed models as HTML table.

library(glmmTMB)
data("Salamanders")
m4 <- glmmTMB(
  count ~ spp + mined + (1 | site),
  ziformula = ~ spp + mined, 
  family = truncated_poisson(link = "log"), 
  data = Salamanders
)

tab_model(m1, m3, m4, show.ci = FALSE)
  Negative impact with 7
items
neg c 7 d count
Predictors Estimates p Odds Ratios p Incidence Rate Ratios p
(Intercept) 6.55 <0.001 0.02 <0.001 0.94 0.745
carer’age -0.00 0.802 1.01 0.355
carer’s gender 0.47 0.094 1.83 0.001
elder’s dependency 1.45 <0.001 2.37 <0.001
spp: PR 0.59 0.062
spp: DM 1.25 0.121
spp: EC-A 0.82 0.331
spp: EC-L 1.91 <0.001
spp: DES-L 1.83 <0.001
spp: DF 1.05 0.765
mined: no 2.76 <0.001
Zero-Inflated Model
(Intercept) 5.79 <0.001
spp: PR 5.36 <0.001
spp: DM 0.65 0.223
spp: EC-A 3.02 0.003
spp: EC-L 0.65 0.223
spp: DES-L 0.51 0.056
spp: DF 0.65 0.223
mined: no 0.09 <0.001
Random Effects
σ2 12.61 3.29 0.10
τ00 0.50 cluster 0.24 cluster 0.05 site
ICC 0.04 0.07 0.34
N 8 cluster 8 cluster 23 site
Observations 888 888 644
Marginal R2 / Conditional R2 0.127 / 0.160 0.181 / 0.237 0.724 / 0.819

References

Nakagawa S, Johnson P, Schielzeth H (2017) The coefficient of determination R2 and intra-class correlation coefficient from generalized linear mixed-effects models revisted and expanded. J. R. Soc. Interface 14. doi: 10.1098/rsif.2017.0213

sjPlot/inst/doc/plot_model_estimates.html0000644000176200001440000050541714150131447020365 0ustar liggesusers Plotting Estimates (Fixed Effects) of Regression Models

Plotting Estimates (Fixed Effects) of Regression Models

Daniel Lüdecke

2021-11-26

This document describes how to plot estimates as forest plots (or dot whisker plots) of various regression models, using the plot_model() function. plot_model() is a generic plot-function, which accepts many model-objects, like lm, glm, lme, lmerMod etc.

plot_model() allows to create various plot tyes, which can be defined via the type-argument. The default is type = "fe", which means that fixed effects (model coefficients) are plotted. For mixed effects models, only fixed effects are plotted by default as well.

library(sjPlot)
library(sjlabelled)
library(sjmisc)
library(ggplot2)

data(efc)
theme_set(theme_sjplot())

Fitting a logistic regression model

First, we fit a model that will be used in the following examples. The examples work in the same way for any other model as well.

# create binary response
y <- ifelse(efc$neg_c_7 < median(na.omit(efc$neg_c_7)), 0, 1)

# create data frame for fitting model
df <- data.frame(
  y = to_factor(y),
  sex = to_factor(efc$c161sex),
  dep = to_factor(efc$e42dep),
  barthel = efc$barthtot,
  education = to_factor(efc$c172code)
)

# set variable label for response
set_label(df$y) <- "High Negative Impact"

# fit model
m1 <- glm(y ~., data = df, family = binomial(link = "logit"))

Plotting estimates of generalized linear models

The simplest function call is just passing the model object as argument. By default, estimates are sorted in descending order, with the highest effect at the top.

plot_model(m1)

The “neutral” line, i.e. the vertical intercept that indicates no effect (x-axis position 1 for most glm’s and position 0 for most linear models), is drawn slightly thicker than the other grid lines. You can change the line color with the vline.color-argument.

plot_model(m1, vline.color = "red")

Sorting estimates

By default, the estimates are sorted in the same order as they were introduced into the model. Use sort.est = TRUE to sort estimates in descending order, from highest to lowest value.

plot_model(m1, sort.est = TRUE)

Another way to sort estimates is to use the order.terms-argument. This is a numeric vector, indicating the order of estimates in the plot. In the summary, we see that “sex2” is the first term, followed by the three dependency-categories (position 2-4), the Barthel-Index (5) and two levels for intermediate and high level of education (6 and 7).

summary(m1)
#> 
#> Call:
#> glm(formula = y ~ ., family = binomial(link = "logit"), data = df)
#> 
#> Deviance Residuals: 
#>     Min       1Q   Median       3Q      Max  
#> -2.2654  -0.9275   0.4610   0.9464   2.0215  
#> 
#> Coefficients:
#>              Estimate Std. Error z value Pr(>|z|)    
#> (Intercept)  0.700232   0.576715   1.214 0.224682    
#> sex2         0.649136   0.186186   3.486 0.000489 ***
#> dep2         0.485259   0.361498   1.342 0.179480    
#> dep3         1.125130   0.361977   3.108 0.001882 ** 
#> dep4         0.910194   0.441774   2.060 0.039368 *  
#> barthel     -0.029802   0.004732  -6.298 3.02e-10 ***
#> education2   0.226525   0.200298   1.131 0.258081    
#> education3   0.283600   0.249327   1.137 0.255346    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> (Dispersion parameter for binomial family taken to be 1)
#> 
#>     Null deviance: 1122.16  on 814  degrees of freedom
#> Residual deviance:  939.77  on 807  degrees of freedom
#>   (93 observations deleted due to missingness)
#> AIC: 955.77
#> 
#> Number of Fisher Scoring iterations: 4

Now we want the educational levels (6 and 7) first, than gender (1), followed by dependency (2-4)and finally the Barthel-Index (5). Use this order as numeric vector for the order.terms-argument.

plot_model(m1, order.terms = c(6, 7, 1, 2, 3, 4, 5))

Estimates on the untransformed scale

By default, plot_model() automatically exponentiates coefficients, if appropriate (e.g. for models with log or logit link). You can explicitley prevent transformation by setting the transform-argument to NULL, or apply any transformation by using a character vector with the function name.

plot_model(m1, transform = NULL)

plot_model(m1, transform = "plogis")

Showing value labels

By default, just the dots and error bars are plotted. Use show.values = TRUE to show the value labels with the estimates values, and use show.p = FALSE to suppress the asterisks that indicate the significance level of the p-values. Use value.offset to adjust the relative positioning of value labels to the dots and lines.

plot_model(m1, show.values = TRUE, value.offset = .3)

Labelling the plot

As seen in the above examples, by default, the plotting-functions of sjPlot retrieve value and variable labels if the data is labelled, using the sjlabelled-package. If the data is not labelled, the variable names are used. In such cases, use the arguments title, axis.labels and axis.title to annotate the plot title and axes. If you want variable names instead of labels, even for labelled data, use "" as argument-value, e.g. axis.labels = "", or set auto.label to FALSE.

Furthermore, plot_model() applies case-conversion to all labels by default, using the snakecase-package. This converts labels into human-readable versions. Use case = NULL to turn case-conversion off, or refer to the package-vignette of the snakecase-package for further options.

data(iris)
m2 <- lm(Sepal.Length ~ Sepal.Width + Petal.Length + Species, data = iris)

# variable names as labels, but made "human readable"
# separating dots are removed
plot_model(m2)

# to use variable names even for labelled data
plot_model(m1, axis.labels = "", title = "my own title")

Pick or remove specific terms from plot

Use terms resp. rm.terms to select specific terms that should (not) be plotted.

# keep only coefficients sex2, dep2 and dep3
plot_model(m1, terms = c("sex2", "dep2", "dep3"))

# remove coefficients sex2, dep2 and dep3
plot_model(m1, rm.terms = c("sex2", "dep2", "dep3"))

Standardized estimates

For linear models, you can also plot standardized beta coefficients, using type = "std" or type = "std2". These two options differ in the way how coefficients are standardized. type = "std2" plots standardized beta values, however, standardization follows Gelman’s (2008) suggestion, rescaling the estimates by dividing them by two standard deviations instead of just one.

plot_model(m2, type = "std")

Bayesian models (fitted with Stan)

plot_model() also supports stan-models fitted with the rstanarm or brms packages. However, there are a few differences compared to the previous plot examples.

First, of course, there are no confidence intervals, but uncertainty intervals - high density intervals, to be precise.

Second, there’s not just one interval range, but an inner and outer probability. By default, the inner probability is fixed to .5 (50%), while the outer probability is specified via ci.lvl (which defaults to .89 (89%) for Bayesian models). However, you can also use the arguments prob.inner and prob.outer to define the intervals boundaries.

Third, the point estimate is by default the median, but can also be another value, like mean. This can be specified with the bpe-argument.

if (require("rstanarm", quietly = TRUE)) {
  # make sure we apply a nice theme
  library(ggplot2)
  theme_set(theme_sjplot())
  
  data(mtcars)
  m <- stan_glm(mpg ~ wt + am + cyl + gear, data = mtcars, chains = 1)
  
  # default model
  plot_model(m)
  # same model, with mean point estimate, dot-style for point estimate
  # and different inner/outer probabilities of the HDI
  plot_model(
    m, 
    bpe = "mean",
    bpe.style = "dot",
    prob.inner = .4,
    prob.outer = .8
  )
}

Tweaking plot appearance

There are several options to customize the plot appearance:

  • The colors-argument either takes the name of a valid colorbrewer palette (see also the related vignette), "bw" or "gs" for black/white or greyscaled colors, or a string with a color name.
  • value.offset and value.size adjust the positioning and size of value labels, if shown.
  • dot.size and line.size change the size of dots and error bars.
  • vline.color changes the neutral “intercept” line.
  • width, alpha and scale are passed down to certain ggplot-geoms, like geom_errorbar() or geom_density_ridges().
plot_model(
  m1, 
  colors = "Accent", 
  show.values = TRUE,
  value.offset = .4,
  value.size = 4,
  dot.size = 3,
  line.size = 1.5,
  vline.color = "blue",
  width = 1.5
)

References

Gelman A (2008) Scaling regression inputs by dividing by two standard deviations. Statistics in Medicine 27: 2865–2873.

sjPlot/inst/doc/sjtitemanalysis.R0000644000176200001440000001305714150131450016615 0ustar liggesusers## ----echo = FALSE------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- knitr::opts_chunk$set(collapse = TRUE, comment = "#>") if (!requireNamespace("dplyr", quietly = TRUE) || !requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("parameters", quietly = TRUE) || !requireNamespace("psych", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) } ## ---- echo=FALSE, message=FALSE, warning=FALSE-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- library(sjPlot) library(sjmisc) library(dplyr) data(efc) # create data frame with COPE-index scale mydf <- dplyr::select(efc, contains("cop")) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_itemscale(mydf) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- library(parameters) # Compute PCA on Cope-Index, and retrieve # factor indices for each COPE index variable pca <- parameters::principal_components(mydf) factor.groups <- parameters::closest_component(pca) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_itemscale(mydf, factor.groups) ## ----------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------- tab_itemscale(mydf, factor.groups, show.shapiro = TRUE, show.kurtosis = TRUE) sjPlot/inst/doc/plot_interactions.html0000644000176200001440000041163414150131412017676 0ustar liggesusers Plotting Interaction Effects of Regression Models

Plotting Interaction Effects of Regression Models

Daniel Lüdecke

2021-11-26

This document describes how to plot marginal effects of interaction terms from various regression models, using the plot_model() function. plot_model() is a generic plot-function, which accepts many model-objects, like lm, glm, lme, lmerMod etc.

plot_model() allows to create various plot tyes, which can be defined via the type-argument. The default is type = "fe", which means that fixed effects (model coefficients) are plotted. To plot marginal effects of interaction terms, call plot_model() with:

  • type = "pred" to plot predicted values (marginal effects) for specific model terms, including interaction terms.
  • type = "eff", which is similar to type = "pred", however, discrete predictors are held constant at their proportions (not reference level). It internally calls via .
  • type = "emm", which is similar to type = "eff". It internally calls via .
  • type = "int" to plot marginal effects of interaction terms in a more convenient way.

plot_model() supports labelled data and automatically uses variable and value labels to annotate the plot. This works with most regression modelling functions.

Note: For marginal effects plots, sjPlot calls functions from the ggeffects-package. If you need more flexibility when creating marginal effects plots, consider directly using the ggeffects-package.

Two-Way-Interactions

Note: To better understand the principle of plotting interaction terms, it might be helpful to read the vignette on marginal effects first.

To plot marginal effects of interaction terms, at least two model terms need to be specified (the terms that define the interaction) in the terms-argument, for which the effects are computed. To plot marginal effects for three-way-interactions, all three terms need to be specified in terms.

A convenient way to automatically plot interactions is type = "int", which scans the model formula for interaction terms and then uses these as terms-argument.

library(sjPlot)
library(sjmisc)
library(ggplot2)
data(efc)
theme_set(theme_sjplot())

# make categorical
efc$c161sex <- to_factor(efc$c161sex)

# fit model with interaction
fit <- lm(neg_c_7 ~ c12hour + barthtot * c161sex, data = efc)

plot_model(fit, type = "pred", terms = c("barthtot", "c161sex"))

For type = "int", no terms need to be specified. Note that this plot type automatically uses the first interaction term in the formula for the x-axis, while the second term is used as grouping factor. Furthermore, if continuous variables are used as second term, you can specify preset-values for this term with the mdrt.values-argument, which are then used as grouping levels.

In this example, the second term is a factor with two levels (male/female), so there is no need for choosing specific values for the moderator.

plot_model(fit, type = "int")

To switch the terms, in this example barthtot and c161sex, simply switch the order of these terms on the terms-argument and use type = "pred".

plot_model(fit, type = "pred", terms = c("c161sex", "barthtot [0, 100]"))

To switch the terms for plot-type type = "int", you need to re-fit the model and change the formula accordingly, i.e. using c161sex as first term in the interaction.

# fit model with interaction, switching terms in formula
fit <- lm(neg_c_7 ~ c12hour + c161sex * barthtot, data = efc)
plot_model(fit, type = "int")

By default, for continuous variables, the minimum and maximum values are chosen as grouping levels, which are 0 and 100 - that’s why the previous two plots are identical. You have other options as well, e.g. the mean-value and +/- 1 standard deviation (as suggested by Cohen and Cohen for continuous variables and popularized by Aiken and West 1991), which can be specified using mdrt.values.

plot_model(fit, type = "int", mdrt.values = "meansd")

Three-Way-Interactions

Since the terms-argument accepts up to three model terms, you can also compute marginal effects for a 3-way-interaction.

# fit model with 3-way-interaction
fit <- lm(neg_c_7 ~ c12hour * barthtot * c161sex, data = efc)

# select only levels 30, 50 and 70 from continuous variable Barthel-Index
plot_model(fit, type = "pred", terms = c("c12hour", "barthtot [30,50,70]", "c161sex"))

Again, type = "int" will automatically plot the interaction terms, however, using mdrt.values = "minmax" as default - in this case, the “levels” 0 and 100 from continuous variable barthtot are chosen by default.

plot_model(fit, type = "int")
#> [[1]]

#> 
#> [[2]]

#> 
#> [[3]]

#> 
#> [[4]]

References

Aiken and West (1991). Multiple Regression: Testing and Interpreting Interactions.

sjPlot/inst/doc/tab_model_estimates.html0000644000176200001440000055204014150131556020150 0ustar liggesusers Summary of Regression Models as HTML Table

Summary of Regression Models as HTML Table

Daniel Lüdecke

2021-11-26

tab_model() is the pendant to plot_model(), however, instead of creating plots, tab_model() creates HTML-tables that will be displayed either in your IDE’s viewer-pane, in a web browser or in a knitr-markdown-document (like this vignette).

HTML is the only output-format, you can’t (directly) create a LaTex or PDF output from tab_model() and related table-functions. However, it is possible to easily export the tables into Microsoft Word or Libre Office Writer.

This vignette shows how to create table from regression models with tab_model(). There’s a dedicated vignette that demonstrate how to change the table layout and appearance with CSS.

Note! Due to the custom CSS, the layout of the table inside a knitr-document differs from the output in the viewer-pane and web browser!

# load package
library(sjPlot)
library(sjmisc)
library(sjlabelled)

# sample data
data("efc")
efc <- as_factor(efc, c161sex, c172code)

A simple HTML table from regression results

First, we fit two linear models to demonstrate the tab_model()-function.

m1 <- lm(barthtot ~ c160age + c12hour + c161sex + c172code, data = efc)
m2 <- lm(neg_c_7 ~ c160age + c12hour + c161sex + e17age, data = efc)

The simplest way of producing the table output is by passing the fitted model as parameter. By default, estimates, confidence intervals (CI) and p-values (p) are reported. As summary, the numbers of observations as well as the R-squared values are shown.

tab_model(m1)
  Total score BARTHEL INDEX
Predictors Estimates CI p
(Intercept) 87.15 77.96 – 96.34 <0.001
carer’age -0.21 -0.35 – -0.07 0.004
average number of hours
of care per week
-0.28 -0.32 – -0.24 <0.001
carer’s gender: Female -0.39 -4.49 – 3.71 0.850
carer’s level of
education: intermediate
level of education
1.37 -3.12 – 5.85 0.550
carer’s level of
education: high level of
education
-1.64 -7.22 – 3.93 0.564
Observations 821
R2 / R2 adjusted 0.271 / 0.266

Automatic labelling

As the sjPlot-packages features labelled data, the coefficients in the table are already labelled in this example. The name of the dependent variable(s) is used as main column header for each model. For non-labelled data, the coefficient names are shown.

data(mtcars)
m.mtcars <- lm(mpg ~ cyl + hp + wt, data = mtcars)
tab_model(m.mtcars)
  mpg
Predictors Estimates CI p
(Intercept) 38.75 35.09 – 42.41 <0.001
cyl -0.94 -2.07 – 0.19 0.098
hp -0.02 -0.04 – 0.01 0.140
wt -3.17 -4.68 – -1.65 <0.001
Observations 32
R2 / R2 adjusted 0.843 / 0.826

If factors are involved and auto.label = TRUE, “pretty” parameters names are used (see format_parameters().

set.seed(2)
dat <- data.frame(
  y = runif(100, 0, 100),
  drug = as.factor(sample(c("nonsense", "useful", "placebo"), 100, TRUE)),
  group = as.factor(sample(c("control", "treatment"), 100, TRUE))
)

pretty_names <- lm(y ~ drug * group, data = dat)
tab_model(pretty_names)
  y
Predictors Estimates CI p
(Intercept) 66.84 52.97 – 80.71 <0.001
drug [placebo] -7.18 -28.25 – 13.89 0.500
drug [useful] -30.95 -53.08 – -8.82 0.007
group [treatment] -21.66 -40.13 – -3.19 0.022
drug [placebo] * group
[treatment]
4.15 -23.68 – 31.98 0.768
drug [useful] * group
[treatment]
30.85 2.38 – 59.33 0.034
Observations 100
R2 / R2 adjusted 0.116 / 0.069

Turn off automatic labelling

To turn off automatic labelling, use auto.label = FALSE, or provide an empty character vector for pred.labels and dv.labels.

tab_model(m1, auto.label = FALSE)
  barthtot
Predictors Estimates CI p
(Intercept) 87.15 77.96 – 96.34 <0.001
c160age -0.21 -0.35 – -0.07 0.004
c12hour -0.28 -0.32 – -0.24 <0.001
c161sex2 -0.39 -4.49 – 3.71 0.850
c172code2 1.37 -3.12 – 5.85 0.550
c172code3 -1.64 -7.22 – 3.93 0.564
Observations 821
R2 / R2 adjusted 0.271 / 0.266

Same for models with non-labelled data and factors.

tab_model(pretty_names, auto.label = FALSE)
  y
Predictors Estimates CI p
(Intercept) 66.84 52.97 – 80.71 <0.001
drugplacebo -7.18 -28.25 – 13.89 0.500
druguseful -30.95 -53.08 – -8.82 0.007
grouptreatment -21.66 -40.13 – -3.19 0.022
drugplacebo:grouptreatment 4.15 -23.68 – 31.98 0.768
druguseful:grouptreatment 30.85 2.38 – 59.33 0.034
Observations 100
R2 / R2 adjusted 0.116 / 0.069

More than one model

tab_model() can print multiple models at once, which are then printed side-by-side. Identical coefficients are matched in a row.

tab_model(m1, m2)
  Total score BARTHEL INDEX Negative impact with 7
items
Predictors Estimates CI p Estimates CI p
(Intercept) 87.15 77.96 – 96.34 <0.001 9.83 7.33 – 12.33 <0.001
carer’age -0.21 -0.35 – -0.07 0.004 0.01 -0.01 – 0.03 0.359
average number of hours
of care per week
-0.28 -0.32 – -0.24 <0.001 0.02 0.01 – 0.02 <0.001
carer’s gender: Female -0.39 -4.49 – 3.71 0.850 0.43 -0.15 – 1.01 0.147
carer’s level of
education: intermediate
level of education
1.37 -3.12 – 5.85 0.550
carer’s level of
education: high level of
education
-1.64 -7.22 – 3.93 0.564
elder’age 0.01 -0.03 – 0.04 0.741
Observations 821 879
R2 / R2 adjusted 0.271 / 0.266 0.067 / 0.063

Generalized linear models

For generalized linear models, the ouput is slightly adapted. Instead of Estimates, the column is named Odds Ratios, Incidence Rate Ratios etc., depending on the model. The coefficients are in this case automatically converted (exponentiated). Furthermore, pseudo R-squared statistics are shown in the summary.

m3 <- glm(
  tot_sc_e ~ c160age + c12hour + c161sex + c172code, 
  data = efc,
  family = poisson(link = "log")
)

efc$neg_c_7d <- ifelse(efc$neg_c_7 < median(efc$neg_c_7, na.rm = TRUE), 0, 1)
m4 <- glm(
  neg_c_7d ~ c161sex + barthtot + c172code,
  data = efc,
  family = binomial(link = "logit")
)

tab_model(m3, m4)
  Services for elderly neg c 7 d
Predictors Incidence Rate Ratios CI p Odds Ratios CI p
(Intercept) 0.30 0.21 – 0.45 <0.001 6.54 3.66 – 11.96 <0.001
carer’age 1.01 1.01 – 1.02 <0.001
average number of hours
of care per week
1.00 1.00 – 1.00 <0.001
carer’s gender: Female 1.01 0.87 – 1.19 0.867 1.87 1.31 – 2.69 0.001
carer’s level of
education: intermediate
level of education
1.47 1.21 – 1.79 <0.001 1.23 0.84 – 1.82 0.288
carer’s level of
education: high level of
education
1.90 1.52 – 2.38 <0.001 1.37 0.84 – 2.23 0.204
Total score BARTHEL INDEX 0.97 0.96 – 0.97 <0.001
Observations 840 815
R2 Nagelkerke 0.106 0.191

Untransformed estimates on the linear scale

To plot the estimates on the linear scale, use transform = NULL.

tab_model(m3, m4, transform = NULL, auto.label = FALSE)
  tot_sc_e neg_c_7d
Predictors Log-Mean CI p Log-Odds CI p
(Intercept) -1.19 -1.58 – -0.80 <0.001 1.88 1.30 – 2.48 <0.001
c160age 0.01 0.01 – 0.02 <0.001
c12hour 0.00 0.00 – 0.00 <0.001
c161sex2 0.01 -0.15 – 0.18 0.867 0.63 0.27 – 0.99 0.001
c172code2 0.39 0.19 – 0.58 <0.001 0.21 -0.18 – 0.60 0.288
c172code3 0.64 0.42 – 0.87 <0.001 0.31 -0.17 – 0.80 0.204
barthtot -0.03 -0.04 – -0.03 <0.001
Observations 840 815
R2 Nagelkerke 0.106 0.191

More complex models

Other models, like hurdle- or zero-inflated models, also work with tab_model(). In this case, the zero inflation model is indicated in the table. Use show.zeroinf = FALSE to hide this part from the table.

library(pscl)
data("bioChemists")
m5 <- zeroinfl(art ~ fem + mar + kid5 + ment | kid5 + phd + ment, data = bioChemists)

tab_model(m5)
  art
Predictors Incidence Rate Ratios CI p
Count Model
(Intercept) 1.83 1.61 – 2.10 <0.001
fem [Women] 0.80 0.72 – 0.90 <0.001
mar [Married] 1.14 1.01 – 1.30 0.041
kid5 0.86 0.78 – 0.94 0.001
ment 1.02 1.01 – 1.02 <0.001
Zero-Inflated Model
(Intercept) 0.45 0.20 – 1.01 0.054
kid5 1.12 0.79 – 1.58 0.531
phd 1.02 0.78 – 1.33 0.881
ment 0.88 0.81 – 0.95 0.002
Observations 915
R2 / R2 adjusted 0.230 / 0.226

You can combine any model in one table.

tab_model(m1, m3, m5, auto.label = FALSE, show.ci = FALSE)
  barthtot tot_sc_e art
Predictors Estimates p Incidence Rate Ratios p Incidence Rate Ratios p
(Intercept) 87.15 <0.001 0.30 <0.001
c160age -0.21 0.004 1.01 <0.001
c12hour -0.28 <0.001 1.00 <0.001
c161sex2 -0.39 0.850 1.01 0.867
c172code2 1.37 0.550 1.47 <0.001
c172code3 -1.64 0.564 1.90 <0.001
count_(Intercept) 1.83 <0.001
count_femWomen 0.80 <0.001
count_marMarried 1.14 0.041
count_kid5 0.86 0.001
count_ment 1.02 <0.001
Zero-Inflated Model
zero_(Intercept) 0.45 0.054
zero_kid5 1.12 0.531
zero_phd 1.02 0.881
zero_ment 0.88 0.002
Observations 821 840 915
R2 / R2 adjusted 0.271 / 0.266 0.106 0.230 / 0.226

Show or hide further columns

tab_model() has some argument that allow to show or hide specific columns from the output:

  • show.est to show/hide the column with model estimates.
  • show.ci to show/hide the column with confidence intervals.
  • show.se to show/hide the column with standard errors.
  • show.std to show/hide the column with standardized estimates (and their standard errors).
  • show.p to show/hide the column with p-values.
  • show.stat to show/hide the column with the coefficients’ test statistics.
  • show.df for linear mixed models, when p-values are based on degrees of freedom with Kenward-Rogers approximation, these degrees of freedom are shown.

Adding columns

In the following example, standard errors, standardized coefficients and test statistics are also shown.

tab_model(m1, show.se = TRUE, show.std = TRUE, show.stat = TRUE)
  Total score BARTHEL INDEX
Predictors Estimates std. Error std. Beta standardized std. Error CI standardized CI Statistic p
(Intercept) 87.15 4.68 -0.01 0.08 77.96 – 96.34 -0.17 – 0.16 18.62 <0.001
carer’age -0.21 0.07 -0.09 0.03 -0.35 – -0.07 -0.16 – -0.03 -2.87 0.004
average number of hours
of care per week
-0.28 0.02 -0.48 0.03 -0.32 – -0.24 -0.54 – -0.42 -14.95 <0.001
carer’s gender: Female -0.39 2.09 -0.01 0.07 -4.49 – 3.71 -0.15 – 0.13 -0.19 0.850
carer’s level of
education: intermediate
level of education
1.37 2.28 0.05 0.08 -3.12 – 5.85 -0.11 – 0.20 0.60 0.550
carer’s level of
education: high level of
education
-1.64 2.84 -0.06 0.10 -7.22 – 3.93 -0.24 – 0.13 -0.58 0.564
Observations 821
R2 / R2 adjusted 0.271 / 0.266

Removing columns

In the following example, default columns are removed.

tab_model(m3, m4, show.ci = FALSE, show.p = FALSE, auto.label = FALSE)
  tot_sc_e neg_c_7d
Predictors Incidence Rate Ratios Odds Ratios
(Intercept) 0.30 6.54
c160age 1.01
c12hour 1.00
c161sex2 1.01 1.87
c172code2 1.47 1.23
c172code3 1.90 1.37
barthtot 0.97
Observations 840 815
R2 Nagelkerke 0.106 0.191

Removing and sorting columns

Another way to remove columns, which also allows to reorder the columns, is the col.order-argument. This is a character vector, where each element indicates a column in the output. The value "est", for instance, indicates the estimates, while "std.est" is the column for standardized estimates and so on.

By default, col.order contains all possible columns. All columns that should shown (see previous tables, for example using show.se = TRUE to show standard errors, or show.st = TRUE to show standardized estimates) are then printed by default. Colums that are excluded from col.order are not shown, no matter if the show*-arguments are TRUE or FALSE. So if show.se = TRUE, butcol.order does not contain the element "se", standard errors are not shown. On the other hand, if show.est = FALSE, but col.order does include the element "est", the columns with estimates are not shown.

In summary, col.order can be used to exclude columns from the table and to change the order of colums.

tab_model(
  m1, show.se = TRUE, show.std = TRUE, show.stat = TRUE,
  col.order = c("p", "stat", "est", "std.se", "se", "std.est")
)
  Total score BARTHEL INDEX
Predictors p Statistic Estimates standardized std. Error std. Error std. Beta
(Intercept) <0.001 18.62 87.15 0.08 4.68 -0.01
carer’age 0.004 -2.87 -0.21 0.03 0.07 -0.09
average number of hours
of care per week
<0.001 -14.95 -0.28 0.03 0.02 -0.48
carer’s gender: Female 0.850 -0.19 -0.39 0.07 2.09 -0.01
carer’s level of
education: intermediate
level of education
0.550 0.60 1.37 0.08 2.28 0.05
carer’s level of
education: high level of
education
0.564 -0.58 -1.64 0.10 2.84 -0.06
Observations 821
R2 / R2 adjusted 0.271 / 0.266

Collapsing columns

With collapse.ci and collapse.se, the columns for confidence intervals and standard errors can be collapsed into one column together with the estimates. Sometimes this table layout is required.

tab_model(m1, collapse.ci = TRUE)
  Total score BARTHEL INDEX
Predictors Estimates p
(Intercept) 87.15
(77.96 – 96.34)
<0.001
carer’age -0.21
(-0.35 – -0.07)
0.004
average number of hours
of care per week
-0.28
(-0.32 – -0.24)
<0.001
carer’s gender: Female -0.39
(-4.49 – 3.71)
0.850
carer’s level of
education: intermediate
level of education
1.37
(-3.12 – 5.85)
0.550
carer’s level of
education: high level of
education
-1.64
(-7.22 – 3.93)
0.564
Observations 821
R2 / R2 adjusted 0.271 / 0.266

Defining own labels

There are different options to change the labels of the column headers or coefficients, e.g. with:

  • pred.labels to change the names of the coefficients in the Predictors column. Note that the length of pred.labels must exactly match the amount of predictors in the Predictor column.
  • dv.labels to change the names of the model columns, which are labelled with the variable labels / names from the dependent variables.
  • Further more, there are various string.*-arguments, to change the name of column headings.
tab_model(
  m1, m2, 
  pred.labels = c("Intercept", "Age (Carer)", "Hours per Week", "Gender (Carer)",
                  "Education: middle (Carer)", "Education: high (Carer)", 
                  "Age (Older Person)"),
  dv.labels = c("First Model", "M2"),
  string.pred = "Coeffcient",
  string.ci = "Conf. Int (95%)",
  string.p = "P-Value"
)
  First Model M2
Coeffcient Estimates Conf. Int (95%) P-Value Estimates Conf. Int (95%) P-Value
Intercept 87.15 77.96 – 96.34 <0.001 9.83 7.33 – 12.33 <0.001
Age (Carer) -0.21 -0.35 – -0.07 0.004 0.01 -0.01 – 0.03 0.359
Hours per Week -0.28 -0.32 – -0.24 <0.001 0.02 0.01 – 0.02 <0.001
Gender (Carer) -0.39 -4.49 – 3.71 0.850 0.43 -0.15 – 1.01 0.147
Education: middle (Carer) 1.37 -3.12 – 5.85 0.550
Education: high (Carer) -1.64 -7.22 – 3.93 0.564
Age (Older Person) 0.01 -0.03 – 0.04 0.741
Observations 821 879
R2 / R2 adjusted 0.271 / 0.266 0.067 / 0.063

Including reference level of categorical predictors

By default, for categorical predictors, the variable names and the categories for regression coefficients are shown in the table output.

library(glmmTMB)
data("Salamanders")
model <- glm(
  count ~ spp + Wtemp + mined + cover,
  family = poisson(),
  data = Salamanders
)

tab_model(model)
  count
Predictors Incidence Rate Ratios CI p
(Intercept) 0.22 0.17 – 0.29 <0.001
spp [PR] 0.25 0.16 – 0.38 <0.001
spp [DM] 1.26 0.98 – 1.62 0.074
spp [EC-A] 0.46 0.33 – 0.64 <0.001
spp [EC-L] 1.86 1.48 – 2.36 <0.001
spp [DES-L] 1.97 1.57 – 2.49 <0.001
spp [DF] 1.08 0.83 – 1.41 0.549
Wtemp 1.00 0.93 – 1.08 0.977
mined [no] 9.97 7.91 – 12.69 <0.001
cover 0.79 0.73 – 0.86 <0.001
Observations 644
R2 Nagelkerke 0.758

You can include the reference level for categorical predictors by setting show.reflvl = TRUE.

tab_model(model, show.reflvl = TRUE)
  count
Predictors Incidence Rate Ratios CI p
(Intercept) 0.22 0.17 – 0.29 <0.001
Wtemp 1.00 0.93 – 1.08 0.977
cover 0.79 0.73 – 0.86 <0.001
GP Reference
PR 0.25 0.16 – 0.38 <0.001
DM 1.26 0.98 – 1.62 0.074
EC-A 0.46 0.33 – 0.64 <0.001
EC-L 1.86 1.48 – 2.36 <0.001
DES-L 1.97 1.57 – 2.49 <0.001
DF 1.08 0.83 – 1.41 0.549
yes Reference
no 9.97 7.91 – 12.69 <0.001
Observations 644
R2 Nagelkerke 0.758

To show variable names, categories and include the reference level, also set prefix.labels = "varname".

tab_model(model, show.reflvl = TRUE, prefix.labels = "varname")
  count
Predictors Incidence Rate Ratios CI p
(Intercept) 0.22 0.17 – 0.29 <0.001
Wtemp 1.00 0.93 – 1.08 0.977
cover 0.79 0.73 – 0.86 <0.001
spp: GP Reference
spp: PR 0.25 0.16 – 0.38 <0.001
spp: DM 1.26 0.98 – 1.62 0.074
spp: EC-A 0.46 0.33 – 0.64 <0.001
spp: EC-L 1.86 1.48 – 2.36 <0.001
spp: DES-L 1.97 1.57 – 2.49 <0.001
spp: DF 1.08 0.83 – 1.41 0.549
mined: yes Reference
mined: no 9.97 7.91 – 12.69 <0.001
Observations 644
R2 Nagelkerke 0.758

Style of p-values

You can change the style of how p-values are displayed with the argument p.style. With p.style = "stars", the p-values are indicated as * in the table.

tab_model(m1, m2, p.style = "stars")
  Total score BARTHEL INDEX Negative impact with 7
items
Predictors Estimates CI Estimates CI
(Intercept) 87.15 *** 77.96 – 96.34 9.83 *** 7.33 – 12.33
carer’age -0.21 ** -0.35 – -0.07 0.01 -0.01 – 0.03
average number of hours
of care per week
-0.28 *** -0.32 – -0.24 0.02 *** 0.01 – 0.02
carer’s gender: Female -0.39 -4.49 – 3.71 0.43 -0.15 – 1.01
carer’s level of
education: intermediate
level of education
1.37 -3.12 – 5.85
carer’s level of
education: high level of
education
-1.64 -7.22 – 3.93
elder’age 0.01 -0.03 – 0.04
Observations 821 879
R2 / R2 adjusted 0.271 / 0.266 0.067 / 0.063
  • p<0.05   ** p<0.01   *** p<0.001

Another option would be scientific notation, using p.style = "scientific", which also can be combined with digits.p.

tab_model(m1, m2, p.style = "scientific", digits.p = 2)
  Total score BARTHEL INDEX Negative impact with 7
items
Predictors Estimates CI p Estimates CI p
(Intercept) 87.15 77.96 – 96.34 9.33e-65 9.83 7.33 – 12.33 3.11e-14
carer’age -0.21 -0.35 – -0.07 4.18e-03 0.01 -0.01 – 0.03 3.59e-01
average number of hours
of care per week
-0.28 -0.32 – -0.24 7.77e-45 0.02 0.01 – 0.02 2.69e-11
carer’s gender: Female -0.39 -4.49 – 3.71 8.50e-01 0.43 -0.15 – 1.01 1.47e-01
carer’s level of
education: intermediate
level of education
1.37 -3.12 – 5.85 5.50e-01
carer’s level of
education: high level of
education
-1.64 -7.22 – 3.93 5.64e-01
elder’age 0.01 -0.03 – 0.04 7.41e-01
Observations 821 879
R2 / R2 adjusted 0.271 / 0.266 0.067 / 0.063

Automatic matching for named vectors

Another way to easily assign labels are named vectors. In this case, it doesn’t matter if pred.labels has more labels than coefficients in the model(s), or in which order the labels are passed to tab_model(). The only requirement is that the labels’ names equal the coefficients names as they appear in the summary()-output.

# example, coefficients are "c161sex2" or "c172code3"
summary(m1)
#> 
#> Call:
#> lm(formula = barthtot ~ c160age + c12hour + c161sex + c172code, 
#>     data = efc)
#> 
#> Residuals:
#>     Min      1Q  Median      3Q     Max 
#> -75.144 -14.944   4.401  18.661  72.393 
#> 
#> Coefficients:
#>             Estimate Std. Error t value Pr(>|t|)    
#> (Intercept) 87.14994    4.68009  18.621  < 2e-16 ***
#> c160age     -0.20716    0.07211  -2.873  0.00418 ** 
#> c12hour     -0.27883    0.01865 -14.950  < 2e-16 ***
#> c161sex2    -0.39402    2.08893  -0.189  0.85044    
#> c172code2    1.36596    2.28440   0.598  0.55004    
#> c172code3   -1.64045    2.84037  -0.578  0.56373    
#> ---
#> Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#> 
#> Residual standard error: 25.35 on 815 degrees of freedom
#>   (87 observations deleted due to missingness)
#> Multiple R-squared:  0.2708, Adjusted R-squared:  0.2664 
#> F-statistic: 60.54 on 5 and 815 DF,  p-value: < 2.2e-16

pl <- c(
  `(Intercept)` = "Intercept",
  e17age = "Age (Older Person)",
  c160age = "Age (Carer)", 
  c12hour = "Hours per Week", 
  barthtot = "Barthel-Index",
  c161sex2 = "Gender (Carer)",
  c172code2 = "Education: middle (Carer)", 
  c172code3 = "Education: high (Carer)",
  a_non_used_label = "We don't care"
)
 
tab_model(
  m1, m2, m3, m4, 
  pred.labels = pl, 
  dv.labels = c("Model1", "Model2", "Model3", "Model4"),
  show.ci = FALSE, 
  show.p = FALSE, 
  transform = NULL
)
  Model1 Model2 Model3 Model4
Predictors Estimates Estimates Log-Mean Log-Odds
Intercept 87.15 9.83 -1.19 1.88
Age (Carer) -0.21 0.01 0.01
Hours per Week -0.28 0.02 0.00
Gender (Carer) -0.39 0.43 0.01 0.63
Education: middle (Carer) 1.37 0.39 0.21
Education: high (Carer) -1.64 0.64 0.31
Age (Older Person) 0.01
Barthel-Index -0.03
Observations 821 879 840 815
R2 / R2 adjusted 0.271 / 0.266 0.067 / 0.063 0.106 0.191

Keep or remove coefficients from the table

Using the terms- or rm.terms-argument allows us to explicitly show or remove specific coefficients from the table output.

tab_model(m1, terms = c("c160age", "c12hour"))
  Total score BARTHEL INDEX
Predictors Estimates CI p
carer’age -0.21 -0.35 – -0.07 0.004
average number of hours
of care per week
-0.28 -0.32 – -0.24 <0.001
Observations 821
R2 / R2 adjusted 0.271 / 0.266

Note that the names of terms to keep or remove should match the coefficients names. For categorical predictors, one example would be:

tab_model(m1, rm.terms = c("c172code2", "c161sex2"))
  Total score BARTHEL INDEX
Predictors Estimates CI p
(Intercept) 87.15 77.96 – 96.34 <0.001
carer’age -0.21 -0.35 – -0.07 0.004
average number of hours
of care per week
-0.28 -0.32 – -0.24 <0.001
carer’s level of
education: high level of
education
-1.64 -7.22 – 3.93 0.564
Observations 821
R2 / R2 adjusted 0.271 / 0.266
sjPlot/inst/doc/sjtitemanalysis.Rmd0000644000176200001440000001032513662304072017142 0ustar liggesusers--- title: "Item Analysis of a Scale or an Index" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette vignette: > %\VignetteIndexEntry{Item Analysis of a Scale or an Index} %\VignetteEngine{knitr::rmarkdown} %\VignetteEncoding{UTF-8} --- ```{r echo = FALSE} knitr::opts_chunk$set(collapse = TRUE, comment = "#>") if (!requireNamespace("dplyr", quietly = TRUE) || !requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("parameters", quietly = TRUE) || !requireNamespace("psych", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } else { knitr::opts_chunk$set(eval = TRUE) } ``` This document shows examples for using the `tab_itemscale()` function of the sjPlot package. ## Performing an item analysis of a scale or index This function performs an item analysis with certain statistics that are useful for scale or index development. Following statistics are computed for each variable (column) of a data frame: * percentage of missing values * mean value * standard deviation * skew * item difficulty * item discrimination * Cronbach's Alpha if item was removed from scale * mean (or average) inter-item-correlation Optional, following statistics can be computed as well: * kurstosis * Shapiro-Wilk Normality Test If the argument `factor.groups` is _not_ `NULL`, the data frame df will be splitted into groups, assuming that `factor.groups` indicate those columns (variables) of the data frame that belong to a certain factor (see, for instance, return value of function `tab_pca()` or `parameters::principal_components()` as example for retrieving factor groups for a scale). This is useful when you have perfomed a principal component analysis or factor analysis as first step, and now want to see whether the found factors / components represent a scale or index score. To demonstrate this function, we first need some data: ```{r, echo=FALSE, message=FALSE, warning=FALSE} library(sjPlot) library(sjmisc) library(dplyr) data(efc) # create data frame with COPE-index scale mydf <- dplyr::select(efc, contains("cop")) ``` ## Index score with one component The simplest function call is just passing the data frame as argument. In this case, the function assumes that all variables of the data frame belong to one factor only. ```{r} tab_itemscale(mydf) ``` To interprete the output, we may consider following values as rule-of-thumbs for indicating a reliable scale: * item difficulty should range between 0.2 and 0.8. Ideal value is p+(1-p)/2 (which mostly is between 0.5 and 0.8) * for item discrimination, acceptable values are 0.2 or higher; the closer to 1 the better * in case the total Cronbach's Alpha value is below the acceptable cut-off of 0.7 (mostly if an index has few items), the mean inter-item-correlation is an alternative measure to indicate acceptability; satisfactory range lies between 0.2 and 0.4 ## Index score with more than one component The items of the COPE index used for our example do not represent a single factor. We can check this, for instance, with a principle component analysis. If you know, which variable belongs to which factor (i.e. which variable is part of which component), you can pass a numeric vector with these group indices to the argument `factor.groups`. In this case, the data frame is divided into the components specified by `factor.groups`, and each component (or factor) is analysed. ```{r} library(parameters) # Compute PCA on Cope-Index, and retrieve # factor indices for each COPE index variable pca <- parameters::principal_components(mydf) factor.groups <- parameters::closest_component(pca) ``` The PCA extracted two components. Now `tab_itemscale()` ... 1. performs an item analysis on both components, showing whether each of them is a reliable and useful scale or index score 2. builds an index of each component, by standardizing each scale 3. and adds a component-correlation-matrix, to see whether the index scores (which are based on the components) are highly correlated or not. ```{r} tab_itemscale(mydf, factor.groups) ``` ## Adding further statistics ```{r} tab_itemscale(mydf, factor.groups, show.shapiro = TRUE, show.kurtosis = TRUE) ``` sjPlot/inst/doc/plot_marginal_effects.Rmd0000644000176200001440000001701014147735034020251 0ustar liggesusers--- title: "Plotting Marginal Effects of Regression Models" author: "Daniel Lüdecke" date: "`r Sys.Date()`" output: rmarkdown::html_vignette params: EVAL: !r identical(Sys.getenv("NOT_CRAN"), "true") --- ```{r set-options, echo = FALSE} knitr::opts_chunk$set( collapse = TRUE, comment = "#>", message = FALSE, dev = "png", fig.width = 7, fig.height = 3.5, warning = FALSE, eval = TRUE # eval = if (isTRUE(exists("params"))) params$EVAL else FALSE ) options(width = 800, tibble.width = Inf) if (!requireNamespace("sjmisc", quietly = TRUE) || !requireNamespace("splines", quietly = TRUE) || !requireNamespace("ggplot2", quietly = TRUE) || !requireNamespace("sjlabelled", quietly = TRUE)) { knitr::opts_chunk$set(eval = FALSE) } ``` This document describes how to plot marginal effects of various regression models, using the `plot_model()` function. `plot_model()` is a generic plot-function, which accepts many model-objects, like `lm`, `glm`, `lme`, `lmerMod` etc. `plot_model()` allows to create various plot tyes, which can be defined via the `type`-argument. The default is `type = "fe"`, which means that fixed effects (model coefficients) are plotted. To plot marginal effects, call `plot_model()` with: * `type = "pred"` to plot predicted values (marginal effects) for specific model terms. * `type = "eff"`, which is similar to `type = "pred"`, however, discrete predictors are held constant at their proportions (not reference level). It internally calls \code{\link[effects]{Effect}} via \code{\link[ggeffects]{ggeffect}}. * `type = "emm"`, which is similar to `type = "eff"`. It internally calls \code{\link[emmeans]{emmeans}} via \code{\link[ggeffects]{ggemmeans}}. * `type = "int"` to plot marginal effects of interaction terms. To plot marginal effects of regression models, at least one model term needs to be specified for which the effects are computed. It is also possible to compute marginal effects for model terms, grouped by the levels of another model's predictor. The function also allows plotting marginal effects for two- or three-way-interactions, however, this is shown in a different vignette. `plot_model()` supports [labelled data](https://cran.r-project.org/package=sjlabelled) and automatically uses variable and value labels to annotate the plot. This works with most regression modelling functions. ***Note:** For marginal effects plots, **sjPlot** calls functions from the [**ggeffects-package**](https://strengejacke.github.io/ggeffects/). If you need more flexibility when creating marginal effects plots, consider directly using the **ggeffects**-package.* # Marginal effects `plot_model(type = "pred")` computes predicted values for all possible levels and values from a model's predictors. In the simplest case, a fitted model is passed as first argument, followed by the `type` argument and the term in question as `terms` argument: ```{r} library(sjPlot) library(ggplot2) data(efc) theme_set(theme_sjplot()) fit <- lm(barthtot ~ c12hour + neg_c_7 + c161sex + c172code, data = efc) plot_model(fit, type = "pred", terms = "c12hour") ``` The plot shows the predicted values for the response at each value from the term _c12hour_. ## Marginal effects for different groups The `terms`-argument accepts up to three model terms, where the second and third term indicate grouping levels. This allows predictions for the term in question at different levels for other model terms: ```{r} plot_model(fit, type = "pred", terms = c("c12hour", "c172code")) ``` A second grouping structure can be defined, which will create a plot with multiple panels in grid layout: ```{r} plot_model(fit, type = "pred", terms = c("c12hour", "c172code", "c161sex")) ``` ## Marginal effects at specific values or levels The `terms`-argument not only defines the model terms of interest, but each model term _that defines the grouping structure_ can be limited to certain values. This allows to compute and plot marginal effects for terms at specific values only. To define these values, put them in square brackets directly after the term name: `terms = c("c12hour [30, 50, 80]", "c172code [1,3]")` ```{r} plot_model(fit, type = "pred", terms = c("c12hour [30, 50, 80]", "c172code [1,3]")) ``` Note that in the above plot, although the values 30, 50 and 80 only are selected from _c12hour_, the continuous scale automatically adds panel grids every 5 units along the x-axis. Defining own values is especially useful when variables are, for instance, log-transformed. `plot_model()` then typically only uses the range of the log-transformed variable, which is in most cases not what we want. In such situation, specify the range in the `terms`-argument. ```{r} data(mtcars) mpg_model <- lm(mpg ~ log(hp), data = mtcars) # x-values and predictions based on the log(hp)-values plot_model(mpg_model, type = "pred", terms = "hp") # x-values and predictions based on hp-values from 50 to 150 plot_model(mpg_model, type = "pred", terms = "hp [50:150]") ``` The brackets in the `terms`-argument also accept the name of a valid function, to (back-)transform predicted valued. In this example, an alternative would be to specify that values should be exponentiated, which is indicated by `[exp]` in the `terms`-argument: ```{r} # x-values and predictions based on exponentiated hp-values plot_model(mpg_model, type = "pred", terms = "hp [exp]") ``` ## Polynomial terms and splines The function also works for models with polynomial terms or splines. Following code reproduces the plot from `?splines::bs`: ```{r} library(splines) data(women) fm1 <- lm(weight ~ bs(height, df = 5), data = women) plot_model(fm1, type = "pred", terms = "height") ``` ## Different constant values for factors Model predictions are based on all possible combinations of the model terms, which are - roughly speaking - created using `expand.grid()`. For the terms in question, all values are used for combinations. All other model predictors that are _not_ specified in the `terms`-argument, are held constant (which is achieved with `sjstats::typical_value()`). By default, continuous variables are set to their mean, while factors are set to their reference level. ```{r} data(efc) efc$c172code <- sjlabelled::as_factor(efc$c172code) fit <- lm(neg_c_7 ~ c12hour + c172code, data = efc) # reference category is used for "c172code", i.e. c172code # used the first level as value for predictions plot_model(fit, type = "pred", terms = "c12hour") ``` However, one may want to set factors to their _proportions_ instead of reference level. E.g., a factor _gender_ with value 0 for female and value 1 for male persons, would be set to `0` when marginal effects are computed with `type = "pred"`. But if 40% of the sample are female persons, another possibility to hold this factor constant is to use the value `.4` (reflecting the proportion of 40%). If this is required, use `type = "eff"`, which internally does not call `predict()` to compute marginal effects, but rather `effects::effect()`. ```{r} # proportion is used for "c172code", i.e. it is set to # mean(sjlabelled::as_numeric(efc$c172code), na.rm = T), # which is about 1.9715 plot_model(fit, type = "eff", terms = "c12hour") ``` # Interaction terms Plotting interaction terms are described in a [separate vignette](plot_interactions.html). sjPlot/inst/CITATION0000644000176200001440000000054513662304072013646 0ustar liggesusersyear <- sub("-.*", "", meta$Date) note <- sprintf("R package version %s", meta$Version) bibentry(bibtype="manual", title = "sjPlot: Data Visualization for Statistics in Social Science", author = person("Daniel", "Lüdecke"), year = year, note = note, url = "https://CRAN.R-project.org/package=sjPlot")